diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
new file mode 100644
index 0000000..f1b763a
--- /dev/null
+++ b/.github/CODEOWNERS
@@ -0,0 +1,5 @@
+# Default owner
+* @0xCCF4
+
+# Github workflows
+/.github/ @0xCCF4
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 21c8124..8622ffa 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -14,14 +14,13 @@ jobs:
             archive: zip
           - target: x86_64-unknown-linux-musl
             archive: tar.gz
-          - target: x86_64-apple-darwin
-            archive: zip
 
     steps:
       - uses: actions/checkout@master
       - uses: actions-rust-lang/setup-rust-toolchain@v1.8.0
         with:
           toolchain: "stable"
+          target: ${{ matrix.target }}
 
       - name: Build
         run: cargo build --verbose
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 7618268..0583e6b 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -11,7 +11,10 @@ env:
 
 jobs:
   test:
-
+    strategy: 
+      matrix:
+        features: ["", "hash-sha1,hash-sha2,hash-xxh"]
+    
     runs-on: ubuntu-latest
 
     steps:
@@ -21,5 +24,9 @@ jobs:
         with:
           toolchain: "stable"
 
-      - name: Build with stable toolchain
-        run: cargo build --verbose
+      - name: Build feature set ${{ matrix.features }}
+        run: cargo build --no-default-features --features "${{ matrix.features }}"
+
+      - name: Test feature set ${{ matrix.features }}
+        run: cargo test --no-default-features --features "${{ matrix.features }}"
+
diff --git a/Cargo.lock b/Cargo.lock
index b802725..8b53470 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4,9 +4,9 @@ version = 3
 
 [[package]]
 name = "aho-corasick"
-version = "1.1.2"
+version = "1.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0"
+checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
 dependencies = [
  "memchr",
 ]
@@ -61,16 +61,17 @@ dependencies = [
 
 [[package]]
 name = "anyhow"
-version = "1.0.81"
+version = "1.0.82"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0952808a6c2afd1aa8947271f3a60f1a6763c7b912d210184c5149b5cf147247"
+checksum = "f538837af36e6f6a9be0faa67f9a314f8119e4e4b5867c6ab40ed60360142519"
 
 [[package]]
 name = "backup-deduplicator"
-version = "0.1.0"
+version = "0.3.0"
 dependencies = [
  "anyhow",
  "clap",
+ "const_format",
  "env_logger",
  "exitcode",
  "file-id",
@@ -81,7 +82,6 @@ dependencies = [
  "serde_json",
  "sha1",
  "sha2",
- "sysinfo",
  "xxhash-rust",
 ]
 
@@ -168,45 +168,34 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f7e3352a27098ba6b09546e5f13b15165e6a88b5c2723afecb3ea9576b27e3ea"
 
 [[package]]
-name = "core-foundation-sys"
-version = "0.8.6"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f"
-
-[[package]]
-name = "cpufeatures"
-version = "0.2.12"
+name = "const_format"
+version = "0.2.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504"
+checksum = "e3a214c7af3d04997541b18d432afaff4c455e79e2029079647e72fc2bd27673"
 dependencies = [
- "libc",
+ "const_format_proc_macros",
 ]
 
 [[package]]
-name = "crossbeam-deque"
-version = "0.8.5"
+name = "const_format_proc_macros"
+version = "0.2.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d"
+checksum = "c7f6ff08fd20f4f299298a28e2dfa8a8ba1036e6cd2460ac1de7b425d76f2500"
 dependencies = [
- "crossbeam-epoch",
- "crossbeam-utils",
+ "proc-macro2",
+ "quote",
+ "unicode-xid",
 ]
 
 [[package]]
-name = "crossbeam-epoch"
-version = "0.9.18"
+name = "cpufeatures"
+version = "0.2.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
+checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504"
 dependencies = [
- "crossbeam-utils",
+ "libc",
 ]
 
-[[package]]
-name = "crossbeam-utils"
-version = "0.8.19"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345"
-
 [[package]]
 name = "crypto-common"
 version = "0.1.6"
@@ -249,12 +238,6 @@ dependencies = [
  "crypto-common 0.2.0-pre.5",
 ]
 
-[[package]]
-name = "either"
-version = "1.10.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a"
-
 [[package]]
 name = "env_filter"
 version = "0.1.0"
@@ -317,9 +300,9 @@ dependencies = [
 
 [[package]]
 name = "getrandom"
-version = "0.2.12"
+version = "0.2.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5"
+checksum = "94b22e06ecb0110981051723910cbf0b5f5e09a2062dd7663334ee79a9d1286c"
 dependencies = [
  "cfg-if",
  "libc",
@@ -346,18 +329,18 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
 
 [[package]]
 name = "hybrid-array"
-version = "0.2.0-rc.7"
+version = "0.2.0-rc.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "87c2311a0adecbffff284aabcf1249b1485193b16e685f9ef171b1ba82979cff"
+checksum = "53668f5da5a41d9eaf4bf7064be46d1ebe6a4e1ceed817f387587b18f2b51047"
 dependencies = [
  "typenum",
 ]
 
 [[package]]
 name = "itoa"
-version = "1.0.10"
+version = "1.0.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c"
+checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
 
 [[package]]
 name = "libc"
@@ -373,18 +356,9 @@ checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
 
 [[package]]
 name = "memchr"
-version = "2.7.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149"
-
-[[package]]
-name = "ntapi"
-version = "0.4.1"
+version = "2.7.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4"
-dependencies = [
- "winapi",
-]
+checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d"
 
 [[package]]
 name = "num_cpus"
@@ -396,12 +370,6 @@ dependencies = [
  "libc",
 ]
 
-[[package]]
-name = "once_cell"
-version = "1.19.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
-
 [[package]]
 name = "proc-macro2"
 version = "1.0.79"
@@ -413,9 +381,9 @@ dependencies = [
 
 [[package]]
 name = "quote"
-version = "1.0.35"
+version = "1.0.36"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef"
+checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
 dependencies = [
  "proc-macro2",
 ]
@@ -429,26 +397,6 @@ dependencies = [
  "getrandom",
 ]
 
-[[package]]
-name = "rayon"
-version = "1.9.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e4963ed1bc86e4f3ee217022bd855b297cef07fb9eac5dfa1f788b220b49b3bd"
-dependencies = [
- "either",
- "rayon-core",
-]
-
-[[package]]
-name = "rayon-core"
-version = "1.12.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
-dependencies = [
- "crossbeam-deque",
- "crossbeam-utils",
-]
-
 [[package]]
 name = "redox_syscall"
 version = "0.4.1"
@@ -460,9 +408,9 @@ dependencies = [
 
 [[package]]
 name = "regex"
-version = "1.10.3"
+version = "1.10.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15"
+checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c"
 dependencies = [
  "aho-corasick",
  "memchr",
@@ -483,9 +431,9 @@ dependencies = [
 
 [[package]]
 name = "regex-syntax"
-version = "0.8.2"
+version = "0.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
+checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56"
 
 [[package]]
 name = "ryu"
@@ -548,36 +496,21 @@ dependencies = [
 
 [[package]]
 name = "strsim"
-version = "0.11.0"
+version = "0.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ee073c9e4cd00e28217186dbe12796d692868f432bf2e97ee73bed0c56dfa01"
+checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
 
 [[package]]
 name = "syn"
-version = "2.0.52"
+version = "2.0.58"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b699d15b36d1f02c3e7c69f8ffef53de37aefae075d8488d4ba1a7788d574a07"
+checksum = "44cfb93f38070beee36b3fef7d4f5a16f27751d94b187b666a5cc5e9b0d30687"
 dependencies = [
  "proc-macro2",
  "quote",
  "unicode-ident",
 ]
 
-[[package]]
-name = "sysinfo"
-version = "0.30.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0c385888ef380a852a16209afc8cfad22795dd8873d69c9a14d2e2088f118d18"
-dependencies = [
- "cfg-if",
- "core-foundation-sys",
- "libc",
- "ntapi",
- "once_cell",
- "rayon",
- "windows",
-]
-
 [[package]]
 name = "typenum"
 version = "1.17.0"
@@ -590,6 +523,12 @@ version = "1.0.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
 
+[[package]]
+name = "unicode-xid"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c"
+
 [[package]]
 name = "utf8parse"
 version = "0.2.1"
@@ -608,47 +547,6 @@ version = "0.11.0+wasi-snapshot-preview1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
 
-[[package]]
-name = "winapi"
-version = "0.3.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
-dependencies = [
- "winapi-i686-pc-windows-gnu",
- "winapi-x86_64-pc-windows-gnu",
-]
-
-[[package]]
-name = "winapi-i686-pc-windows-gnu"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
-
-[[package]]
-name = "winapi-x86_64-pc-windows-gnu"
-version = "0.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
-
-[[package]]
-name = "windows"
-version = "0.52.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be"
-dependencies = [
- "windows-core",
- "windows-targets 0.52.4",
-]
-
-[[package]]
-name = "windows-core"
-version = "0.52.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
-dependencies = [
- "windows-targets 0.52.4",
-]
-
 [[package]]
 name = "windows-sys"
 version = "0.48.0"
diff --git a/Cargo.toml b/Cargo.toml
index 30f4375..452768a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "backup-deduplicator"
-version = "0.1.0"
+version = "0.3.0"
 edition = "2021"
 description = """
 A tool to deduplicate backups. It builds a hash tree of all files and folders
@@ -11,21 +11,26 @@ keywords = ["archive-management", "file", "deduplication", "cleanup"]
 license = "GPL-3.0-or-later"
 homepage = "https://github.com/0xCCF4/BackupDeduplicator"
 repository = "https://github.com/0xCCF4/BackupDeduplicator"
-
-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+documentation = "https://docs.rs/backup-deduplicator"
 
 [dependencies]
-anyhow = "1.0.80"
+anyhow = "1.0.82"
 clap = { version = "4.5.4", features = ["derive"] }
 env_logger = "0.11.2"
 log = "0.4.20"
 filetime = "0.2.23"
 exitcode = "1.1.2"
-sha2 = "0.10.8"
 serde = { version = "1.0.197", features = ["derive", "rc"] }
 serde_json = "1.0.115"
-file-id = "0.2.1"
-sysinfo = "0.30.5"
 num_cpus = "1.16.0"
-xxhash-rust = { version = "0.8.10", features = ["xxh32", "xxh64"] }
-sha1 = "0.11.0-pre.3"
+file-id = "0.2.1"
+xxhash-rust = { version = "0.8.10", features = ["xxh32", "xxh64"], optional = true }
+sha1 = {  version = "0.11.0-pre.3", optional = true }
+sha2 = {  version = "0.10.8", optional = true }
+const_format = "0.2.32"
+
+[features]
+hash-sha1 = ["dep:sha1"]
+hash-sha2 = ["dep:sha2"]
+hash-xxh = ["dep:xxhash-rust"]
+default = ["hash-sha1", "hash-sha2", "hash-xxh"]
diff --git a/README.md b/README.md
index 5e1b24f..3f68892 100644
--- a/README.md
+++ b/README.md
@@ -53,7 +53,12 @@ The tool is a command line tool. There are two stages: `build` and `analyze`.
 ### Build
 Exemplary usage to build a hash tree of a directory:
 ```bash
-backup-deduplicator --threads 16 build -w /parent -o /parent/hash.bdd /parent/target
+backup-deduplicator
+  --threads 16
+  build
+  --working-directory /parent
+  --output /parent/hash.bdd
+  /parent/target
 ```
 This will build a hash tree of the directory `/path/to/parent/target` and save it to
 `hash.bdd` in the parent directory. The tool will use 16 threads to split the hash
@@ -62,7 +67,10 @@ calculation work.
 ### Analyze
 Exemplary usage to analyze a hash tree:
 ```bash
-backup-deduplicator analyze -o /parent/analysis.bdd /parent/hash.bdd
+backup-deduplicator
+  analyze
+  --output /parent/analysis.bdd
+  /parent/hash.bdd
 ```
 This will analyze the hash tree in `hash.bdd` and save the analysis result to `analysis.bdd`.
 The analysis file will then contain a list of JSON objects (one per line),
@@ -76,8 +84,18 @@ The tool is written in Rust, and can be installed using `cargo`:
 cargo install backup-deduplicator
 ```
 
+Precompiled binaries are available for download on the release page
+<https://github.com/0xCCF4/BackupDeduplicator/releases>.
+
+## Features Flags
+The tool uses the rust features flags to enable or disable certain features.
+The following flags are available:
+* `hash-sha1`: Use the [sha1](https://crates.io/crates/sha1) module to enable SHA1 hash function
+* `hash-sha2`: Use the [sha2](https://crates.io/crates/sha2) module to enable SHA512, SHA256 hash functions
+* `hash-xxh`: Use the [xxhash-rust](https://crates.io/crates/xxhash-rust) module to enable XXH3 (32/64) hash functions
+
 ## Contribution
-Contributions to PhotoSort are welcome! If you have a feature request,
+Contributions to the project are welcome! If you have a feature request,
 bug report, or want to contribute to the code, please open an
 issue or a pull request.
 
diff --git a/src/cmd/analyze/analysis.rs b/src/cmd/analyze/analysis.rs
deleted file mode 100644
index cb386af..0000000
--- a/src/cmd/analyze/analysis.rs
+++ /dev/null
@@ -1,69 +0,0 @@
-use std::sync::Weak;
-use std::sync::{Arc, Mutex};
-use serde::{Deserialize, Serialize};
-use crate::data::{FilePath, GeneralHash, SaveFileEntryType};
-
-#[derive(Debug, Serialize, Deserialize)]
-pub enum AnalysisFile {
-    File(FileInformation),
-    Directory(DirectoryInformation),
-    Symlink(SymlinkInformation),
-    Other(OtherInformation),
-}
-
-impl AnalysisFile {
-    pub fn parent(&self) -> &Mutex<Option<Weak<AnalysisFile>>> {
-        match self {
-            AnalysisFile::File(info) => &info.parent,
-            AnalysisFile::Directory(info) => &info.parent,
-            AnalysisFile::Symlink(info) => &info.parent,
-            AnalysisFile::Other(info) => &info.parent,
-        }
-    }
-
-    pub fn path(&self) -> &FilePath {
-        match self {
-            AnalysisFile::File(info) => &info.path,
-            AnalysisFile::Directory(info) => &info.path,
-            AnalysisFile::Symlink(info) => &info.path,
-            AnalysisFile::Other(info) => &info.path,
-        }
-    }
-}
-
-#[derive(Debug, Serialize, Deserialize)]
-pub struct FileInformation {
-    pub path: FilePath,
-    pub content_hash: GeneralHash,
-    pub parent: Mutex<Option<Weak<AnalysisFile>>>,
-}
-
-#[derive(Debug, Serialize, Deserialize)]
-pub struct DirectoryInformation {
-    pub path: FilePath,
-    pub content_hash: GeneralHash,
-    pub children: Mutex<Vec<Arc<AnalysisFile>>>,
-    pub parent: Mutex<Option<Weak<AnalysisFile>>>,
-}
-
-#[derive(Debug, Serialize, Deserialize)]
-pub struct SymlinkInformation {
-    pub path: FilePath,
-    pub content_hash: GeneralHash,
-    pub parent: Mutex<Option<Weak<AnalysisFile>>>,
-}
-
-#[derive(Debug, Serialize, Deserialize)]
-pub struct OtherInformation {
-    pub path: FilePath,
-    pub parent: Mutex<Option<Weak<AnalysisFile>>>,
-}
-
-
-#[derive(Debug, Serialize)]
-pub struct ResultEntryRef<'a, 'b, 'c> {
-    pub ftype: &'a SaveFileEntryType,
-    pub size: u64,
-    pub hash: &'b GeneralHash,
-    pub conflicting: Vec<&'c FilePath>,
-}
diff --git a/src/cmd/build/worker/other.rs b/src/cmd/build/worker/other.rs
deleted file mode 100644
index a6e8d32..0000000
--- a/src/cmd/build/worker/other.rs
+++ /dev/null
@@ -1,33 +0,0 @@
-use std::path::PathBuf;
-use std::sync::mpsc::Sender;
-use log::trace;
-use crate::build::JobResult;
-use crate::build::worker::{worker_fetch_savedata, worker_publish_result_or_trigger_parent, WorkerArgument};
-use crate::data::{File, Job, OtherInformation, SaveFileEntryType};
-
-pub fn worker_run_other(path: PathBuf, modified: u64, size: u64, id: usize, job: Job, result_publish: &Sender<JobResult>, job_publish: &Sender<Job>, arg: &mut WorkerArgument) {
-    trace!("[{}] analyzing other {} > {:?}", id, &job.target_path, path);
-
-    match worker_fetch_savedata(arg, &job.target_path) {
-        Some(found) => {
-            if found.file_type == SaveFileEntryType::Other && found.modified == modified && found.size == size {
-                trace!("Other {:?} is already in save file", path);
-                worker_publish_result_or_trigger_parent(id, true, File::Other(OtherInformation {
-                    path: job.target_path.clone(),
-                    content_size: size,
-                    modified,
-                }), job, result_publish, job_publish, arg);
-                return;
-            }
-        }
-        None => {}
-    }
-    
-    let file = File::Other(OtherInformation {
-        path: job.target_path.clone(),
-        content_size: size,
-        modified,
-    });
-
-    worker_publish_result_or_trigger_parent(id, false, file, job, result_publish, job_publish, arg);
-}
\ No newline at end of file
diff --git a/src/data/file.rs b/src/data/file.rs
deleted file mode 100644
index ac22b58..0000000
--- a/src/data/file.rs
+++ /dev/null
@@ -1,109 +0,0 @@
-use std::path::{PathBuf};
-use serde::{Deserialize, Serialize};
-use crate::data::{FilePath, GeneralHash};
-
-// type ResolveNodeFn = fn(&HandleIdentifier) -> Result<Rc<RefCell<FileContainer>>>;
-// type PathInScopeFn = fn(&Path) -> bool;
-
-
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct FileInformation {
-    pub path: FilePath,
-    pub modified: u64,
-    pub content_hash: GeneralHash,
-    pub content_size: u64,
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct DirectoryInformation {
-    pub path: FilePath,
-    pub modified: u64,
-    pub content_hash: GeneralHash,
-    pub number_of_children: u64,
-    pub children: Vec<File>,
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct SymlinkInformation {
-    pub path: FilePath,
-    pub modified: u64,
-    pub content_hash: GeneralHash, // equal to the target file's hash or if not following symlinks, the symlink's path hashed
-    pub target: PathBuf,
-    pub content_size: u64,
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct OtherInformation {
-    pub path: FilePath,
-    pub modified: u64,
-    pub content_size: u64,
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct StubInformation {
-    pub path: FilePath,
-    pub content_hash: GeneralHash,
-}
-
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub enum File {
-    File(FileInformation),
-    Directory(DirectoryInformation),
-    Symlink(SymlinkInformation),
-    Other(OtherInformation), // for unsupported file types like block devices, character devices, etc., or files without permission
-    Stub(StubInformation), // for files that are already analyzed
-}
-
-// ---- IMPLEMENTATION ----
-
-impl File {
-    pub fn get_content_hash(&self) -> &GeneralHash {
-        match self {
-            File::File(info) => &info.content_hash,
-            File::Directory(info) => &info.content_hash,
-            File::Symlink(info) => &info.content_hash,
-            File::Other(_) => &GeneralHash::NULL,
-            File::Stub(info) => &info.content_hash,
-        }
-    }
-    
-    pub fn get_path(&self) -> &FilePath {
-        match self {
-            File::File(info) => &info.path,
-            File::Directory(info) => &info.path,
-            File::Symlink(info) => &info.path,
-            File::Other(info) => &info.path,
-            File::Stub(info) => &info.path,
-        }
-    }
-
-    pub fn is_directory(&self) -> bool {
-        match self {
-            File::Directory(_) => true,
-            _ => false,
-        }
-    }
-
-    pub fn is_symlink(&self) -> bool {
-        match self {
-            File::Symlink(_) => true,
-            _ => false,
-        }
-    }
-
-    pub fn is_file(&self) -> bool {
-        match self {
-            File::File(_) => true,
-            _ => false,
-        }
-    }
-
-    pub fn is_other(&self) -> bool {
-        match self {
-            File::Other(_) => true,
-            _ => false,
-        }
-    }
-}
diff --git a/src/data/fileid.rs b/src/data/fileid.rs
index ea89264..4414e70 100644
--- a/src/data/fileid.rs
+++ b/src/data/fileid.rs
@@ -6,41 +6,63 @@ use std::path::Path;
 use file_id::FileId;
 use serde::Serialize;
 
+/// Device id type.
 #[cfg(target_family = "unix")]
 type DeviceIdType = u64;
 
+/// Device id type.
 #[cfg(target_family = "windows")]
 type DeviceIdType = u64; // high-res file-id
 
-// file id
-
+/// File id type
 #[cfg(target_family = "unix")]
 type FileIdType = u64;
 
+/// File id type
 #[cfg(target_family = "windows")]
 type FileIdType = u128; // high-res file-id
 
-// structs
-
+/// A file id handle.
+/// 
+/// # Fields
+/// * `inode` - The inode of the file.
+/// * `drive` - The device id of the file.
 #[derive(Debug, Clone, PartialEq, Serialize)]
 pub struct HandleIdentifier {
     pub inode: FileIdType,
     pub drive: DeviceIdType,
 }
 
-pub fn from_path(path: impl AsRef<Path>) -> io::Result<HandleIdentifier> {
-    match file_id::get_file_id(path)? {
-        FileId::Inode { device_id, inode_number } => Ok(HandleIdentifier {
-            inode: inode_number as FileIdType,
-            drive: device_id as DeviceIdType,
-        }),
-        FileId::LowRes { volume_serial_number, file_index } => Ok(HandleIdentifier {
-            inode: file_index as FileIdType,
-            drive: volume_serial_number as DeviceIdType,
-        }),
-        FileId::HighRes { volume_serial_number, file_id } => Ok(HandleIdentifier {
-            inode: file_id as FileIdType, // path windows only -> no downcast will happen
-            drive: volume_serial_number as DeviceIdType,
-        }),
+impl HandleIdentifier {
+    /// Create a new handle identifier from a path.
+    /// 
+    /// # Arguments
+    /// * `path` - The path to the file.
+    /// 
+    /// # Returns
+    /// The handle identifier.
+    /// 
+    /// # Errors
+    /// If the file id cannot be retrieved.
+    pub fn from_path(path: impl AsRef<Path>) -> io::Result<HandleIdentifier> {
+        match file_id::get_file_id(path)? {
+            FileId::Inode { device_id, inode_number } => Ok(HandleIdentifier {
+                // unix
+                inode: inode_number as FileIdType,
+                drive: device_id as DeviceIdType,
+            }),
+            FileId::LowRes { volume_serial_number, file_index } => Ok(HandleIdentifier {
+                // windows
+                inode: file_index as FileIdType,
+                drive: volume_serial_number as DeviceIdType,
+            }),
+            FileId::HighRes { volume_serial_number, file_id } => Ok(HandleIdentifier {
+                // windows
+                inode: file_id as FileIdType,
+                drive: volume_serial_number as DeviceIdType,
+            }),
+        }
     }
 }
+
+
diff --git a/src/data/hash.rs b/src/data/hash.rs
index 62103e7..63fad7a 100644
--- a/src/data/hash.rs
+++ b/src/data/hash.rs
@@ -1,79 +1,251 @@
 use std::fmt;
 use std::fmt::Display;
+use std::path::Path;
 use std::str::FromStr;
 use serde::{Deserialize, Serialize, Serializer};
 use serde::de::Error;
+use const_format::concatcp;
+use crate::stages::build::intermediary_build_data::BuildFile;
+use crate::path::FilePath;
+#[cfg(any(feature = "hash-sha2", feature = "hash-sha1", feature = "hash-xxh"))]
 use crate::utils;
 
+
+/// `GeneralHashType` is an enum that represents the different types of hash functions that can be used.
+///
+/// The following hash functions are supported: SHA512, SHA256, SHA1, XXH64, XXH32, and NULL.
+///
+/// The `hasher` method returns a new instance of a `GeneralHasher` trait object that corresponds to the hash type.
+/// The `hasher` can then be used to compute a hash of that kind.
+///
+/// # Traits
+/// * `FromStr` - to allow parsing a string into a `GeneralHashType`.
+/// * `Display` - to allow formatting a `GeneralHashType` into a string.
+///
+/// # Examples
+/// ```
+/// use std::str::FromStr;
+/// use backup_deduplicator::hash::GeneralHashType;
+///
+/// #[cfg(feature = "hash-sha2")]
+/// {
+/// let hash_type = GeneralHashType::from_str("SHA256").unwrap();
+/// let mut hasher = hash_type.hasher();
+/// hasher.update(b"Hello, world!".as_slice());
+///
+/// assert_eq!(hash_type.to_string(), "SHA256");
+///
+/// let hash = hasher.finalize();
+/// assert_eq!(hash.to_string(), "SHA256:315f5bdb76d078c43b8ac0064e4a0164612b1fce77c869345bfc94c75894edd3");
+/// assert_eq!(hash_type, GeneralHashType::SHA256);
+/// }
+///
+/// ```
+///
+/// # See also
+/// * [GeneralHash] - representation of a hash value.
+/// * [GeneralHasher] - trait for computing hash values.
+///
+/// # Features
+/// * `hash-sha2` - enables the SHA512 and SHA256 hash functions.
+/// * `hash-sha1` - enables the SHA1 hash function.
+/// * `hash-xxh` - enables the XXH64 and XXH32 hash functions.
 #[derive(Debug, Hash, PartialEq, Clone, Copy, Serialize, Deserialize)]
 pub enum GeneralHashType {
+    #[cfg(feature = "hash-sha2")]
     SHA512,
+    #[cfg(feature = "hash-sha2")]
     SHA256,
+    #[cfg(feature = "hash-sha1")]
     SHA1,
+    #[cfg(feature = "hash-xxh")]
     XXH64,
+    #[cfg(feature = "hash-xxh")]
     XXH32,
     NULL,
 }
 
 impl GeneralHashType {
+    /// Returns a new instance of a `GeneralHasher` trait object that corresponds to the hash type.
+    /// The `hasher` can then be used to compute a hash of that kind.
+    ///
+    /// # Returns
+    /// A new instance of a `GeneralHasher` trait object.
+    ///
+    /// # Examples
+    /// See the example in the `GeneralHashType` documentation.
+    ///
+    /// # Features
+    /// * `hash-sha2` - enables the SHA512 and SHA256 hash functions.
+    /// * `hash-sha1` - enables the SHA1 hash function.
+    /// * `hash-xxh` - enables the XXH64 and XXH32 hash functions.
     pub fn hasher(&self) -> Box<dyn GeneralHasher> {
         match self {
+            #[cfg(feature = "hash-sha2")]
             GeneralHashType::SHA512 => Box::new(sha2::Sha512Hasher::new()),
+            #[cfg(feature = "hash-sha2")]
             GeneralHashType::SHA256 => Box::new(sha2::Sha256Hasher::new()),
+            #[cfg(feature = "hash-sha1")]
             GeneralHashType::SHA1 => Box::new(sha1::Sha1Hasher::new()),
+            #[cfg(feature = "hash-xxh")]
             GeneralHashType::XXH64 => Box::new(xxh::Xxh64Hasher::new()),
+            #[cfg(feature = "hash-xxh")]
             GeneralHashType::XXH32 => Box::new(xxh::Xxh32Hasher::new()),
             GeneralHashType::NULL => Box::new(null::NullHasher::new()),
         }
     }
 }
 
+impl GeneralHashType {
+    /// Returns the available hash types as a string.
+    /// 
+    /// # Returns
+    /// The available hash types as a string.
+    /// 
+    /// # Examples
+    /// ```
+    /// use backup_deduplicator::hash::GeneralHashType;
+    /// 
+    /// let supported = GeneralHashType::supported_algorithms();
+    /// println!("Supported algorithms: {}", supported);
+    /// ```
+    pub const fn supported_algorithms() -> &'static str {
+        const SHA2: &'static str = if cfg!(feature = "hash-sha2") { "SHA512, SHA256, " } else { "" };
+        const SHA1: &'static str = if cfg!(feature = "hash-sha1") { "SHA1, " } else { "" };
+        const XXH: &'static str = if cfg!(feature = "hash-xxh") { "XXH64, XXH32, " } else { "" };
+        const NULL: &'static str = "NULL";
+        
+        concatcp!(SHA2, SHA1, XXH, NULL)
+    }
+}
+
 impl FromStr for GeneralHashType {
+    /// Error type for parsing a `GeneralHashType` from a string.
     type Err = &'static str;
 
+    /// Parses a string into a `GeneralHashType`.
+    /// 
+    /// # Arguments
+    /// * `s` - The string to parse.
+    /// 
+    /// # Returns
+    /// The `GeneralHashType` that corresponds to the string or an error.
+    /// 
+    /// # Errors
+    /// Returns an error if the string does not correspond to a `GeneralHashType`.
+    /// Returns the available hash types in the error message.
     fn from_str(s: &str) -> Result<Self, Self::Err> {
         match s.to_uppercase().as_str() {
+            #[cfg(feature = "hash-sha2")]
             "SHA512" => Ok(GeneralHashType::SHA512),
+            #[cfg(feature = "hash-sha2")]
             "SHA256" => Ok(GeneralHashType::SHA256),
+            #[cfg(feature = "hash-sha1")]
             "SHA1" => Ok(GeneralHashType::SHA1),
+            #[cfg(feature = "hash-xxh")]
             "XXH64" => Ok(GeneralHashType::XXH64),
+            #[cfg(feature = "hash-xxh")]
             "XXH32" => Ok(GeneralHashType::XXH32),
             "NULL" => Ok(GeneralHashType::NULL),
-            _ => Err("SHA512, SHA256, SHA1, XXH64, XXH32, NULL"),
+            _ => Err(GeneralHashType::supported_algorithms()),
         }
     }
 }
 
-impl fmt::Display for GeneralHashType {
+impl Display for GeneralHashType {
+    /// Converts a `GeneralHashType` into a string.
+    /// 
+    /// # Arguments
+    /// * `f` - The formatter to write to.
+    /// 
+    /// # Returns
+    /// A result indicating whether the operation was successful.
+    /// 
+    /// # Errors
+    /// Never
     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
         match self {
+            #[cfg(feature = "hash-sha2")]
             GeneralHashType::SHA512 => write!(f, "SHA512"),
+            #[cfg(feature = "hash-sha2")]
             GeneralHashType::SHA256 => write!(f, "SHA256"),
+            #[cfg(feature = "hash-sha1")]
             GeneralHashType::SHA1 => write!(f, "SHA1"),
+            #[cfg(feature = "hash-xxh")]
             GeneralHashType::XXH64 => write!(f, "XXH64"),
+            #[cfg(feature = "hash-xxh")]
             GeneralHashType::XXH32 => write!(f, "XXH32"),
             GeneralHashType::NULL => write!(f, "NULL"),
         }
     }
 }
 
+/// `GeneralHash` is an enum that represents a hash value.
+///
+/// The hash value is stored as a byte array of a fixed size.
+/// The size of the byte array depends on the hash function used.
+///
+/// The following hash functions are supported: SHA512, SHA256, SHA1, XXH64, XXH32, and NULL.
+///
+/// The `hash_type` method returns the type of the hash function used.
+/// The `hasher` method returns a new instance of a `GeneralHasher` trait object that corresponds to the hash type.
+/// The `hasher` can then be used to compute a hash of that kind.
+///
+/// # Traits
+/// * `Display` - to allow formatting a `GeneralHash` into a string.
+/// * `FromStr` - to allow parsing a string into a `GeneralHash`.
+/// * `Serialize` - to allow serializing a `GeneralHash` into a string.
+/// * `Deserialize` - to allow deserializing a `GeneralHash` from a string.
+///
+/// # Examples
+/// ```
+/// use std::str::FromStr;
+/// use backup_deduplicator::hash::{GeneralHash, GeneralHashType};
+///
+/// #[cfg(feature = "hash-sha2")]
+/// {
+/// let hash = GeneralHash::from_str("SHA256:315f5bdb76d078c43b8ac0064e4a0164612b1fce77c869345bfc94c75894edd3").unwrap();
+///
+/// let mut hasher = GeneralHashType::SHA256.hasher();
+/// hasher.update(b"Hello, world!".as_slice());
+/// let new_hash = hasher.finalize();
+///
+/// assert_eq!(hash, new_hash);
+/// assert_eq!(hash.to_string(), new_hash.to_string());
+/// }
+/// ```
+///
+/// # See also
+/// * [GeneralHashType] - representation of the different types of hash functions.
+/// * [GeneralHasher] - trait for computing hash values.
+///
 #[derive(Debug, Hash, PartialEq, Eq, Clone, PartialOrd)]
 pub enum GeneralHash {
+    #[cfg(feature = "hash-sha2")]
     SHA512([u8; 64]),
+    #[cfg(feature = "hash-sha2")]
     SHA256([u8; 32]),
+    #[cfg(feature = "hash-sha1")]
     SHA1([u8; 20]),
+    #[cfg(feature = "hash-xxh")]
     XXH64([u8; 8]),
+    #[cfg(feature = "hash-xxh")]
     XXH32([u8; 4]),
     NULL,
 }
 
 impl Display for GeneralHash {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         let capacity = match self {
+            #[cfg(feature = "hash-sha2")]
             GeneralHash::SHA512(_) => 128,
+            #[cfg(feature = "hash-sha2")]
             GeneralHash::SHA256(_) => 64,
+            #[cfg(feature = "hash-sha1")]
             GeneralHash::SHA1(_) => 40,
+            #[cfg(feature = "hash-xxh")]
             GeneralHash::XXH64(_) => 16,
+            #[cfg(feature = "hash-xxh")]
             GeneralHash::XXH32(_) => 8,
             GeneralHash::NULL => 0,
         };
@@ -83,18 +255,23 @@ impl Display for GeneralHash {
         hex.push_str((self.hash_type().to_string() + ":").as_str());
 
         match self {
+            #[cfg(feature = "hash-sha2")]
             GeneralHash::SHA512(data) => for byte in data {
                 hex.push_str(&format!("{:02x}", byte));
             },
+            #[cfg(feature = "hash-sha2")]
             GeneralHash::SHA256(data) => for byte in data {
                 hex.push_str(&format!("{:02x}", byte));
             },
+            #[cfg(feature = "hash-sha1")]
             GeneralHash::SHA1(data) => for byte in data {
                 hex.push_str(&format!("{:02x}", byte));
             },
+            #[cfg(feature = "hash-xxh")]
             GeneralHash::XXH64(data) => for byte in data {
                 hex.push_str(&format!("{:02x}", byte));
             },
+            #[cfg(feature = "hash-xxh")]
             GeneralHash::XXH32(data) => for byte in data {
                 hex.push_str(&format!("{:02x}", byte));
             },
@@ -113,44 +290,69 @@ impl Serialize for GeneralHash {
     }
 }
 
-impl<'de> Deserialize<'de> for GeneralHash {
-    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
-        where
-            D: serde::Deserializer<'de> {
-        let hex = String::deserialize(deserializer)?;
+impl FromStr for GeneralHash {
+    // Error type for parsing a `GeneralHash` from a string.
+    type Err = &'static str;
+
+    /// Parses a string into a `GeneralHash`.
+    /// 
+    /// # Arguments
+    /// * `hex` - The string to parse, in the format `hash_type:hash_data (hex)`.
+    /// 
+    /// # Returns
+    /// The `GeneralHash` that corresponds to the string or an error.
+    /// 
+    /// # Errors
+    /// Returns an error if the string does not correspond to a `GeneralHash`.
+    /// * If the hash type is not recognized.
+    /// * If the hash data is not valid (wrong length or non-hex string).
+    fn from_str(hex: &str) -> Result<Self, Self::Err> {
         let mut iter = hex.split(':');
-        let hash_type = GeneralHashType::from_str(iter.next().ok_or_else(|| D::Error::custom("No hash type"))?).map_err(|err| D::Error::custom(format!("Failed to parse hash type: {}", err)))?;
-        let data = iter.next().ok_or_else(|| D::Error::custom("No hash data"))?;
-        let data = utils::decode_hex(data).map_err(|err| D::Error::custom(format!("Failed to decode hash data: {}", err)))?;
+        let hash_type = GeneralHashType::from_str(iter.next().ok_or_else(|| "No hash type")?).map_err(|_| "Failed to parse hash type")?;
+        
+        #[cfg(any(feature = "hash-sha2", feature = "hash-sha1", feature = "hash-xxh"))]
+        let data = match hash_type { 
+            GeneralHashType::NULL => Vec::new(),
+            _ => {
+                let data = iter.next().ok_or_else(|| "No hash data")?;
+                utils::decode_hex(data).map_err(|_| "Failed to decode hash data")?
+            }
+        };
+        
         let mut hash = GeneralHash::from_type(hash_type);
         match &mut hash {
+            #[cfg(feature = "hash-sha2")]
             GeneralHash::SHA512(target_data) => {
                 if data.len() != 64 {
-                    return Err(D::Error::custom("Invalid data length"));
+                    return Err("Invalid data length");
                 }
                 target_data.copy_from_slice(&data);
             },
+            #[cfg(feature = "hash-sha2")]
             GeneralHash::SHA256(target_data) => {
                 if data.len() != 32 {
-                    return Err(D::Error::custom("Invalid data length"));
+                    return Err("Invalid data length");
                 }
                 target_data.copy_from_slice(&data);
             },
+            #[cfg(feature = "hash-sha1")]
             GeneralHash::SHA1(target_data) => {
                 if data.len() != 20 {
-                    return Err(D::Error::custom("Invalid data length"));
+                    return Err("Invalid data length");
                 }
                 target_data.copy_from_slice(&data);
             },
+            #[cfg(feature = "hash-xxh")]
             GeneralHash::XXH64(target_data) => {
                 if data.len() != 8 {
-                    return Err(D::Error::custom("Invalid data length"));
+                    return Err("Invalid data length");
                 }
                 target_data.copy_from_slice(&data);
             },
+            #[cfg(feature = "hash-xxh")]
             GeneralHash::XXH32(target_data) => {
                 if data.len() != 4 {
-                    return Err(D::Error::custom("Invalid data length"));
+                    return Err("Invalid data length");
                 }
                 target_data.copy_from_slice(&data);
             },
@@ -160,58 +362,281 @@ impl<'de> Deserialize<'de> for GeneralHash {
     }
 }
 
+impl<'de> Deserialize<'de> for GeneralHash {
+    /// Deserializes a `GeneralHash` from a string.
+    /// 
+    /// # Arguments
+    /// * `deserializer` - The deserializer to use.
+    /// 
+    /// # Returns
+    /// The deserialized `GeneralHash`.
+    /// 
+    /// # Errors
+    /// If the string could not be deserialized.
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+        where
+            D: serde::Deserializer<'de> {
+        let hex = String::deserialize(deserializer)?;
+        GeneralHash::from_str(hex.as_str()).map_err(D::Error::custom)
+    }
+}
+
 impl GeneralHash {
+    /// Returns the hash value as a byte array.
+    ///
+    /// # Returns
+    /// A reference to the byte array that represents the hash value.
     pub fn as_bytes(&self) -> &[u8] {
         match self {
+            #[cfg(feature = "hash-sha2")]
             GeneralHash::SHA512(data) => data,
+            #[cfg(feature = "hash-sha2")]
             GeneralHash::SHA256(data) => data,
+            #[cfg(feature = "hash-sha1")]
             GeneralHash::SHA1(data) => data,
+            #[cfg(feature = "hash-xxh")]
             GeneralHash::XXH64(data) => data,
+            #[cfg(feature = "hash-xxh")]
             GeneralHash::XXH32(data) => data,
             GeneralHash::NULL => &[0; 0],
         }
     }
 
+    #[cfg(feature = "hash-sha2")]
+    /// Returns a new instance of a SHA512 hash value.
     pub fn new_sha512() -> Self { Self::from_type(GeneralHashType::SHA512) }
+    
+    #[cfg(feature = "hash-sha2")]
+    /// Returns a new instance of a SHA256 hash value.
     pub fn new_sha256() -> Self { Self::from_type(GeneralHashType::SHA256) }
+    
+    #[cfg(feature = "hash-sha1")]
+    /// Returns a new instance of a SHA1 hash value.
     pub fn new_sha1() -> Self { Self::from_type(GeneralHashType::SHA1) }
+    
+    #[cfg(feature = "hash-xxh")]
+    /// Returns a new instance of a XXH64 hash value.
     pub fn new_xxh64() -> Self { Self::from_type(GeneralHashType::XXH64) }
+    
+    #[cfg(feature = "hash-xxh")]
+    /// Returns a new instance of a XXH32 hash value.
     pub fn new_xxh32() -> Self { Self::from_type(GeneralHashType::XXH32) }
 
+    /// Returns the type of the hash function used.
+    ///
+    /// # Returns
+    /// The type of the hash function used.
+    ///
+    /// # Examples
+    /// ```
+    /// use backup_deduplicator::hash::{GeneralHash, GeneralHashType};
+    ///
+    /// #[cfg(feature = "hash-sha2")]
+    /// {
+    ///    let hash = GeneralHash::new_sha256();
+    // 
+    //     let m = match hash.hash_type() {
+    //         GeneralHashType::SHA256 => true,
+    //         _ => false,
+    //     };
+    // 
+    //     assert!(m);
+    /// }
+    /// ```
     pub fn hash_type(&self) -> GeneralHashType {
         match self {
+            #[cfg(feature = "hash-sha2")]
             GeneralHash::SHA512(_) => GeneralHashType::SHA512,
+            #[cfg(feature = "hash-sha2")]
             GeneralHash::SHA256(_) => GeneralHashType::SHA256,
+            #[cfg(feature = "hash-sha1")]
             GeneralHash::SHA1(_) => GeneralHashType::SHA1,
+            #[cfg(feature = "hash-xxh")]
             GeneralHash::XXH64(_) => GeneralHashType::XXH64,
+            #[cfg(feature = "hash-xxh")]
             GeneralHash::XXH32(_) => GeneralHashType::XXH32,
             GeneralHash::NULL => GeneralHashType::NULL,
         }
     }
-    
+
+    /// Returns a new instance of a `GeneralHash` with the specified hash type.
+    ///
+    /// # Arguments
+    /// * `hash_type` - The type of the hash function to use.
+    ///
+    /// # Returns
+    /// A new instance of a `GeneralHash` with the specified hash type.
     pub fn from_type(hash_type: GeneralHashType) -> Self {
         match hash_type {
+            #[cfg(feature = "hash-sha2")]
             GeneralHashType::SHA512 => GeneralHash::SHA512([0; 64]),
+            #[cfg(feature = "hash-sha2")]
             GeneralHashType::SHA256 => GeneralHash::SHA256([0; 32]),
+            #[cfg(feature = "hash-sha1")]
             GeneralHashType::SHA1 => GeneralHash::SHA1([0; 20]),
+            #[cfg(feature = "hash-xxh")]
             GeneralHashType::XXH64 => GeneralHash::XXH64([0; 8]),
+            #[cfg(feature = "hash-xxh")]
             GeneralHashType::XXH32 => GeneralHash::XXH32([0; 4]),
             GeneralHashType::NULL => GeneralHash::NULL,
         }
     }
 
+    /// Returns a new instance of a `GeneralHash` with the specified hash type.
+    ///
+    /// # Arguments
+    /// * `hash_type` - The type of the hash function to use.
+    ///
+    /// # Returns
+    /// A new instance of a `GeneralHash` with the specified hash type.
+    ///
+    /// # See also
+    /// * [GeneralHashType] - representation of the different types of hash functions.
     pub fn hasher(&self) -> Box<dyn GeneralHasher> {
         self.hash_type().hasher()
     }
+
+    /// Computes the hash value of the specified data.
+    ///
+    /// # Arguments
+    /// * `reader` - The data to hash (supplied as `std::io::Read`).
+    ///
+    /// # Returns
+    /// The size of the data that was hashed.
+    ///
+    /// # Errors
+    /// Returns an error if the data could not be read.
+    pub fn hash_file<T>(&mut self, mut reader: T) -> anyhow::Result<u64>
+        where T: std::io::Read {
+
+        let mut hasher = self.hasher();
+        let mut buffer = [0; 4096];
+        let mut content_size = 0;
+
+        loop {
+            let bytes_read = reader.read(&mut buffer)?;
+            content_size += bytes_read as u64;
+            if bytes_read == 0 {
+                break;
+            }
+            hasher.update(&buffer[..bytes_read]);
+        }
+
+        *self = hasher.finalize();
+
+        Ok(content_size)
+    }
+
+    /// Computes the hash value of file iterator/directory.
+    ///
+    /// # Arguments
+    /// * `children` - The iterator of files to hash.
+    ///
+    /// # Returns
+    /// The count of files that were hashed.
+    ///
+    /// # Errors
+    /// Does not return an error. Might return an error in the future.
+    pub fn hash_directory<'a>(&mut self, children: impl Iterator<Item = &'a BuildFile>) -> anyhow::Result<u64> {
+        let mut hasher = self.hasher();
+
+        let mut content_size = 0;
+
+        for child in children {
+            content_size += 1;
+            hasher.update(child.get_content_hash().as_bytes());
+        }
+
+        *self = hasher.finalize();
+
+        Ok(content_size)
+    }
+
+    /// Computes the hash value of the specified path.
+    ///
+    /// # Arguments
+    /// * `path` - The path to hash.
+    ///
+    /// # Returns
+    /// Does not return a value.
+    ///
+    /// # Errors
+    /// Does not return an error. Might return an error in the future.
+    pub fn hash_path(&mut self, path: &Path) -> anyhow::Result<()> {
+        let mut hasher = self.hasher();
+
+        hasher.update(path.as_os_str().as_encoded_bytes());
+
+        *self = hasher.finalize();
+
+        Ok(())
+    }
+
+    /// Computes the hash value of the specified file path.
+    ///
+    /// # Arguments
+    /// * `path` - The file path to hash.
+    ///
+    /// # Returns
+    /// Does not return a value.
+    ///
+    /// # Errors
+    /// Does not return an error. Might return an error in the future.
+    pub fn hash_filepath(&mut self, path: &FilePath) -> anyhow::Result<()> {
+        let mut hasher = self.hasher();
+
+        for component in &path.path {
+            hasher.update(component.path.as_os_str().as_encoded_bytes());
+        }
+
+        *self = hasher.finalize();
+
+        Ok(())
+    }
 }
 
+/// `GeneralHasher` is a trait for computing hash values.
+///
+/// # Methods
+/// * `new` - creates a new instance of a `GeneralHasher`.
+/// * `update` - updates the hash value with the specified data.
+/// * `finalize` - finalizes the hash value and returns the result.
+///
+/// # Examples
+/// See the example in the `GeneralHash` documentation.
+///
+/// # See also
+/// * [GeneralHash] - representation of a hash value.
+/// * [GeneralHashType] - representation of the different types of hash functions.
 pub trait GeneralHasher {
+    /// Creates a new instance of a `GeneralHasher`.
+    ///
+    /// # Returns
+    /// A new instance of a `GeneralHasher`.
     fn new() -> Self where Self: Sized;
+
+    /// Updates the hash value with the specified data.
+    ///
+    /// # Arguments
+    /// * `data` - The data to hash.
     fn update(&mut self, data: &[u8]);
+
+    /// Finalizes the hash value and returns the result.
+    /// Consumes the `GeneralHasher` instance.
+    ///
+    /// # Returns
+    /// The hash value.
     fn finalize(self: Box<Self>) -> GeneralHash;
 }
 
+#[cfg(feature = "hash-sha1")]
+/// `GeneralHasher` implementation for the SHA1 crate
 mod sha1;
+#[cfg(feature = "hash-sha2")]
+/// `GeneralHasher` implementation for the SHA2 crate
 mod sha2;
+#[cfg(feature = "hash-xxh")]
+/// `GeneralHasher` implementation for the XXH crate
 mod xxh;
+/// `GeneralHasher` implementation for the NULL hash function
 mod null;
diff --git a/src/data/hash/null.rs b/src/data/hash/null.rs
index 2088528..5a427cc 100644
--- a/src/data/hash/null.rs
+++ b/src/data/hash/null.rs
@@ -1,4 +1,4 @@
-use crate::data::{GeneralHash, GeneralHasher};
+use crate::hash::{GeneralHash, GeneralHasher};
 
 pub struct NullHasher {
     
diff --git a/src/data/hash/sha1.rs b/src/data/hash/sha1.rs
index 670bcb6..0f95caf 100644
--- a/src/data/hash/sha1.rs
+++ b/src/data/hash/sha1.rs
@@ -1,5 +1,5 @@
 use sha1::Digest;
-use crate::data::{GeneralHash, GeneralHasher};
+use crate::hash::{GeneralHash, GeneralHasher};
 
 pub struct Sha1Hasher {
     hasher: sha1::Sha1
diff --git a/src/data/hash/sha2.rs b/src/data/hash/sha2.rs
index bea1126..ca98309 100644
--- a/src/data/hash/sha2.rs
+++ b/src/data/hash/sha2.rs
@@ -1,5 +1,5 @@
 use sha2::Digest;
-use crate::data::{GeneralHash, GeneralHasher};
+use crate::hash::{GeneralHash, GeneralHasher};
 
 pub struct Sha512Hasher {
     hasher: sha2::Sha512
diff --git a/src/data/hash/xxh.rs b/src/data/hash/xxh.rs
index b97dbe4..dbc37a8 100644
--- a/src/data/hash/xxh.rs
+++ b/src/data/hash/xxh.rs
@@ -1,5 +1,5 @@
 use xxhash_rust::{xxh32, xxh64};
-use crate::data::{GeneralHash, GeneralHasher};
+use crate::hash::{GeneralHash, GeneralHasher};
 
 pub struct Xxh64Hasher {
     hasher: xxh64::Xxh64
diff --git a/src/data/hashtree_save_file.rs b/src/data/hashtree_save_file.rs
deleted file mode 100644
index eeda980..0000000
--- a/src/data/hashtree_save_file.rs
+++ /dev/null
@@ -1,218 +0,0 @@
-use std::cell::RefCell;
-use std::collections::HashMap;
-use std::io::{BufRead, Write};
-use std::ops::DerefMut;
-use std::sync::Arc;
-
-use anyhow::Result;
-use log::{info, trace, warn};
-use serde::{Deserialize, Serialize};
-
-use crate::data::{FilePath, GeneralHash, GeneralHashType};
-use crate::utils;
-
-#[derive(Debug, Serialize, Deserialize, Clone)]
-pub enum SaveFileVersion {
-    V1,
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone)]
-pub struct SaveFileHeaders {
-    pub version: SaveFileVersion,
-    pub hash_type: GeneralHashType,
-    pub creation_date: u64,
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Hash, Eq)]
-pub enum SaveFileEntryTypeV1 {
-    File,
-    Directory,
-    Symlink,
-    Other,
-}
-pub use SaveFileEntryTypeV1 as SaveFileEntryType;
-
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct SaveFileEntryV1 {
-    pub file_type: SaveFileEntryTypeV1,
-    pub modified: u64,
-    pub size: u64,
-    pub hash: GeneralHash,
-    pub path: FilePath,
-    pub children: Vec<GeneralHash>,
-}
-pub use SaveFileEntryV1 as SaveFileEntry;
-
-#[derive(Debug, Serialize)]
-pub struct SaveFileEntryV1Ref<'a> {
-    pub file_type: &'a SaveFileEntryTypeV1,
-    pub modified: &'a u64,
-    pub size: &'a u64,
-    pub hash: &'a GeneralHash,
-    pub path: &'a FilePath,
-    pub children: Vec<&'a GeneralHash>,
-}
-pub type SaveFileEntryRef<'a> = SaveFileEntryV1Ref<'a>;
-
-pub mod converter;
-
-pub struct SaveFile<'a, W, R> where W: Write, R: BufRead {
-    pub header: SaveFileHeaders,
-    pub file_by_hash: HashMap<GeneralHash, Vec<Arc<SaveFileEntry>>>,
-    pub file_by_path: HashMap<FilePath, Arc<SaveFileEntry>>,
-    pub all_entries: Vec<Arc<SaveFileEntry>>,
-    
-    enable_file_by_hash: bool,
-    enable_file_by_path: bool,
-    enable_all_entry_list: bool,
-    
-    writer: RefCell<&'a mut W>,
-    written_bytes: RefCell<usize>,
-    reader: RefCell<&'a mut R>,
-}
-
-impl<'a, W: Write, R: BufRead> SaveFile<'a, W, R> {
-    pub fn new(writer: &'a mut W, reader: &'a mut R, enable_file_by_hash: bool, enable_file_by_path: bool, enable_all_entry_list: bool) -> Self {
-        let time = utils::get_time();
-        SaveFile {
-            header: SaveFileHeaders {
-                version: SaveFileVersion::V1,
-                hash_type: GeneralHashType::SHA256,
-                creation_date: time,
-            },
-            file_by_hash: HashMap::new(),
-            file_by_path: HashMap::new(),
-            all_entries: Vec::new(),
-            enable_file_by_hash,
-            enable_file_by_path,
-            enable_all_entry_list,
-            writer: RefCell::new(writer),
-            reader: RefCell::new(reader),
-            written_bytes: RefCell::new(0),
-        }
-    }
-    
-    pub fn save_header(&self) -> Result<()> {
-        let header_str = serde_json::to_string(&self.header)?;
-        *self.written_bytes.borrow_mut() += self.writer.borrow_mut().deref_mut().write(header_str.as_bytes())?;
-        *self.written_bytes.borrow_mut() += self.writer.borrow_mut().deref_mut().write(b"\n")?;
-        
-        Ok(())
-    }
-    
-    pub fn load_header(&mut self) -> Result<()> {
-        let mut header_str = String::new();
-        self.reader.borrow_mut().deref_mut().read_line(&mut header_str)?;
-        
-        let header: SaveFileHeaders = serde_json::from_str(header_str.as_str())?;
-        self.header = header;
-        
-        Ok(())
-    }
-
-    pub fn load_entry_no_filter(&mut self) -> Result<Option<Arc<SaveFileEntry>>> {
-        self.load_entry(|_| true)
-    }
-    
-    pub fn load_entry<F: Fn(&SaveFileEntry) -> bool>(&mut self, filter: F) -> Result<Option<Arc<SaveFileEntry>>> {
-        loop {
-            let mut entry_str = String::new();
-            let count = self.reader.borrow_mut().deref_mut().read_line(&mut entry_str)?;
-
-            if count == 0 {
-                return Ok(None);
-            }
-            
-            if count == 1 {
-                continue;
-            }
-
-            let entry: SaveFileEntry = serde_json::from_str(entry_str.as_str())?;
-
-            if entry.hash.hash_type() != self.header.hash_type && !(entry.file_type == SaveFileEntryType::Other && entry.hash.hash_type() == GeneralHashType::NULL) {
-                warn!("Hash type mismatch ignoring entry: {:?}", entry.path);
-                continue;
-            }
-            
-            if !filter(&entry) {
-                trace!("Entry filtered: {:?}", entry.path);
-                continue;
-            }
-
-            let shared_entry = Arc::new(entry);
-
-            if self.enable_file_by_hash {
-                self.file_by_hash.entry(shared_entry.hash.clone()).or_insert_with(Vec::new).push(Arc::clone(&shared_entry));
-            }
-
-            if self.enable_file_by_path {
-                match self.file_by_path.insert(shared_entry.path.clone(), Arc::clone(&shared_entry)) {
-                    None => {}
-                    Some(old) => {
-                        // this happens if analysis was canceled and continued
-                        // and an already analysed file changed
-                        info!("Duplicate entry for path: {:?}", &old.path);
-                        if self.enable_all_entry_list {
-                            self.all_entries.retain(|x| x != &old);
-                        }
-                    }
-                }
-            }
-
-            if self.enable_all_entry_list {
-                self.all_entries.push(Arc::clone(&shared_entry));
-            }
-
-            return Ok(Some(shared_entry))
-        }
-    }
-    
-    pub fn load_all_entries<F: Fn(&SaveFileEntry) -> bool>(&mut self, filter: F) -> Result<()> {
-        while let Some(_) = self.load_entry(&filter)? {}
-        
-        Ok(())
-    }
-
-    pub fn load_all_entries_no_filter(&mut self) -> Result<()> {
-        self.load_all_entries(|_| true)
-    }
-
-    pub fn write_entry(&self, result: &SaveFileEntryV1) -> Result<()> {
-        let string = serde_json::to_string(result)?;
-        *self.written_bytes.borrow_mut() += self.writer.borrow_mut().deref_mut().write(string.as_bytes())?;
-        *self.written_bytes.borrow_mut() += self.writer.borrow_mut().deref_mut().write("\n".as_bytes())?;
-        self.writer.borrow_mut().deref_mut().flush()?;
-        Ok(())
-    }
-
-    pub fn write_entry_ref(&self, result: &SaveFileEntryV1Ref) -> Result<()> {
-        let string = serde_json::to_string(result)?;
-        *self.written_bytes.borrow_mut() += self.writer.borrow_mut().deref_mut().write(string.as_bytes())?;
-        *self.written_bytes.borrow_mut() += self.writer.borrow_mut().deref_mut().write("\n".as_bytes())?;
-        self.writer.borrow_mut().deref_mut().flush()?;
-        Ok(())
-    }
-    
-    pub fn empty_file_by_hash(&mut self) {
-        self.file_by_hash.clear();
-        self.file_by_hash.shrink_to_fit();
-    }
-    
-    pub fn empty_file_by_path(&mut self) {
-        self.file_by_path.clear();
-        self.file_by_path.shrink_to_fit();
-    }
-
-    pub fn empty_entry_list(&mut self) {
-        self.all_entries.clear();
-        self.all_entries.shrink_to_fit();
-    }
-
-    pub fn get_written_bytes(&self) -> usize {
-        *self.written_bytes.borrow()
-    }
-    
-    pub fn flush(&self) -> std::io::Result<()> {
-        self.writer.borrow_mut().deref_mut().flush()
-    }
-}
diff --git a/src/data/hashtree_save_file/converter.rs b/src/data/hashtree_save_file/converter.rs
deleted file mode 100644
index 05b268d..0000000
--- a/src/data/hashtree_save_file/converter.rs
+++ /dev/null
@@ -1,176 +0,0 @@
-use crate::data::{DirectoryInformation, File, FileInformation, GeneralHash, OtherInformation, SaveFileEntryTypeV1, SaveFileEntryV1, SaveFileEntryV1Ref, StubInformation, SymlinkInformation};
-
-impl From<FileInformation> for SaveFileEntryV1 {
-    fn from(value: FileInformation) -> Self {
-        Self {
-            file_type: SaveFileEntryTypeV1::File,
-            modified: value.modified,
-            size: value.content_size,
-            hash: value.content_hash,
-            path: value.path,
-            children: Vec::with_capacity(0),
-        }
-    }
-}
-
-impl From<SymlinkInformation> for SaveFileEntryV1 {
-    fn from(value: SymlinkInformation) -> Self {
-        Self {
-            file_type: SaveFileEntryTypeV1::Symlink,
-            modified: value.modified,
-            size: value.content_size,
-            hash: value.content_hash,
-            path: value.path,
-            children: Vec::with_capacity(0),
-        }
-    }
-}
-
-impl From<DirectoryInformation> for SaveFileEntryV1 {
-    fn from(value: DirectoryInformation) -> Self {
-        let mut result = Self {
-            file_type: SaveFileEntryTypeV1::Directory,
-            modified: value.modified,
-            size: value.number_of_children,
-            hash: value.content_hash,
-            path: value.path,
-            children: Vec::with_capacity(value.children.len()),
-        };
-        for child in value.children {
-            result.children.push(child.get_content_hash().clone());
-        }
-        result
-    }
-}
-
-impl From<OtherInformation> for SaveFileEntryV1 {
-    fn from(value: OtherInformation) -> Self {
-        Self {
-            file_type: SaveFileEntryTypeV1::Other,
-            modified: value.modified,
-            size: value.content_size,
-            hash: GeneralHash::NULL,
-            path: value.path,
-            children: Vec::with_capacity(0),
-        }
-    }
-}
-
-impl From<StubInformation> for SaveFileEntryV1 {
-    fn from(value: StubInformation) -> Self {
-        Self {
-            file_type: SaveFileEntryTypeV1::Other,
-            modified: 0,
-            size: 0,
-            hash: value.content_hash,
-            path: value.path,
-            children: Vec::with_capacity(0),
-        }
-    }
-}
-
-impl<'a> From<&'a FileInformation> for SaveFileEntryV1Ref<'a> {
-    fn from(value: &'a FileInformation) -> Self {
-        Self {
-            file_type: &SaveFileEntryTypeV1::File,
-            modified: &value.modified,
-            hash: &value.content_hash,
-            path: &value.path,
-            size: &value.content_size,
-            children: Vec::with_capacity(0),
-        }
-    }
-}
-
-impl<'a> From<&'a SymlinkInformation> for SaveFileEntryV1Ref<'a> {
-    fn from(value: &'a SymlinkInformation) -> Self {
-        Self {
-            file_type: &SaveFileEntryTypeV1::Symlink,
-            modified: &value.modified,
-            hash: &value.content_hash,
-            path: &value.path,
-            size: &value.content_size,
-            children: Vec::with_capacity(0),
-        }
-    }
-}
-
-impl<'a> From<&'a DirectoryInformation> for SaveFileEntryV1Ref<'a> {
-    fn from(value: &'a DirectoryInformation) -> Self {
-        let mut result = Self {
-            file_type: &SaveFileEntryTypeV1::Directory,
-            modified: &value.modified,
-            hash: &value.content_hash,
-            path: &value.path,
-            size: &value.number_of_children,
-            children: Vec::with_capacity(value.children.len()),
-        };
-        for child in &value.children {
-            result.children.push(child.get_content_hash());
-        }
-        result
-    }
-}
-
-impl<'a> From<&'a OtherInformation> for SaveFileEntryV1Ref<'a> {
-    fn from(value: &'a OtherInformation) -> Self {
-        Self {
-            file_type: &SaveFileEntryTypeV1::Other,
-            modified: &0,
-            hash: &GeneralHash::NULL,
-            path: &value.path,
-            size: &value.content_size,
-            children: Vec::with_capacity(0),
-        }
-    }
-}
-
-impl<'a> From<&'a StubInformation> for SaveFileEntryV1Ref<'a> {
-    fn from(value: &'a StubInformation) -> Self {
-        Self {
-            file_type: &SaveFileEntryTypeV1::Other,
-            modified: &0,
-            hash: &value.content_hash,
-            path: &value.path,
-            size: &0,
-            children: Vec::with_capacity(0),
-        }
-    }
-}
-
-impl From<File> for SaveFileEntryV1 {
-    fn from(value: File) -> Self {
-        match value {
-            File::File(info) => info.into(),
-            File::Directory(info) => info.into(),
-            File::Symlink(info) => info.into(),
-            File::Other(info) => info.into(),
-            File::Stub(info) => info.into(),
-        }
-    }
-}
-
-impl<'a> From<&'a File> for SaveFileEntryV1Ref<'a> {
-    fn from(value: &'a File) -> Self {
-        match value {
-            File::File(info) => info.into(),
-            File::Directory(info) => info.into(),
-            File::Symlink(info) => info.into(),
-            File::Other(info) => info.into(),
-            File::Stub(info) => info.into(),
-        }
-    }
-}
-
-impl<'a> From<&'a SaveFileEntryV1> for SaveFileEntryV1Ref<'a> {
-    fn from(value: &'a SaveFileEntryV1) -> Self {
-        Self {
-            file_type: &value.file_type,
-            modified: &value.modified,
-            hash: &value.hash,
-            path: &value.path,
-            size: &value.size,
-            children: Vec::with_capacity(0),
-        }
-    }
-}
diff --git a/src/data/job.rs b/src/data/job.rs
deleted file mode 100644
index 471a336..0000000
--- a/src/data/job.rs
+++ /dev/null
@@ -1,61 +0,0 @@
-use std::sync::{Arc, Mutex};
-use crate::data::{File, FilePath};
-
-pub type SharedJob = Arc<Job>;
-
-static JOB_COUNTER: Mutex<usize> = Mutex::new(0);
-
-fn new_job_counter_id() -> usize {
-    let mut counter = JOB_COUNTER.lock().expect("Failed to lock job counter");
-    *counter += 1;
-    (*counter).clone()
-}
-
-#[derive(Debug, Clone, PartialEq, Copy)]
-pub enum JobState {
-    NotProcessed,
-    Analyzed,
-}
-
-#[derive(Debug)]
-pub struct Job {
-    id: usize,
-    pub parent: Option<SharedJob>,
-    pub finished_children: Mutex<Vec<File>>,
-    pub target_path: FilePath,
-    pub state: JobState,
-}
-
-impl Job {
-    pub fn new(parent: Option<SharedJob>, target_path: FilePath) -> Self {
-        Job {
-            id: new_job_counter_id(),
-            parent,
-            target_path,
-            state: JobState::NotProcessed,
-            finished_children: Mutex::new(Vec::new()),
-        }
-    }
-    
-    pub fn job_id(&self) -> usize {
-        self.id
-    }
-
-    pub(crate) fn new_job_id(mut self) -> Self {
-        self.id = new_job_counter_id();
-        self
-    }
-}
-
-impl JobTrait for Job {
-    fn job_id(&self) -> usize {
-        Job::job_id(self)
-    }
-}
-
-
-pub trait JobTrait<T: std::marker::Send = Self> {
-    fn job_id(&self) -> usize;
-}
-
-pub trait ResultTrait<T: std::marker::Send = Self> {}
diff --git a/src/data/path.rs b/src/data/path.rs
index f0ceb80..ba63475 100644
--- a/src/data/path.rs
+++ b/src/data/path.rs
@@ -4,50 +4,100 @@ use std::path::PathBuf;
 use anyhow::{Result};
 use serde::{Deserialize, Serialize};
 
+/// The type of archive.
 #[derive(Debug, Clone, Serialize, Deserialize, Hash)]
 pub enum ArchiveType {
     Tar,
     Zip,
 }
 
+/// The target of a path.
+///
+/// # Fields
+/// * `File` - The path points to a file.
+/// * `Archive` - The path points to an archive. That is further traversed.
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
 pub enum PathTarget {
     File,
     // Archive(ArchiveType),
 }
 
+/// A path component. A path points to a file or an archive.
+///
+/// # Fields
+/// * `path` - The path.
+/// * `target` - The target of the path.
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
 pub struct PathComponent {
     pub path: PathBuf,
     pub target: PathTarget,
 }
 
+/// A file path. A file path specifies a target file. It may consist of multiple path components.
+/// Imagine the following file structure:
+///
+/// ```text
+/// DIR stuff
+/// \-- DIR more_stuff
+///   \-- FILE archive.tar.gz
+///     \-- FILE file_in_archive.txt
+/// ```
+///
+/// The file path to `file_in_archive.txt` would consist of the following path components:
+/// - `stuff/more_stuff/archive.tar.gz` (target: Archive)
+/// - `file_in_archive.txt` (target: File)
+///
+/// The file path to `archive.tar.gz` would consist of the following path components:
+/// - `stuff/more_stuff/archive.tar.gz` (target: File)
+///
+/// # Fields
+/// * `path` - The path components.
+///
+/// # Examples
+/// ```
+/// use std::path::PathBuf;
+/// use backup_deduplicator::path::FilePath;
+///
+/// let path = FilePath::from_realpath(PathBuf::from("test.txt"));
+///
+/// ```
 #[derive(Debug, Clone, Serialize, Deserialize, Hash)]
 pub struct FilePath {
     pub path: Vec<PathComponent>
 }
 
 impl FilePath {
-    pub fn from_vec(path: Vec<PathComponent>) -> Self {
+    /// Creates a new file path from path components.
+    ///
+    /// # Arguments
+    /// * `path` - The path components.
+    ///
+    /// # Returns
+    /// The file path.
+    pub fn from_pathcomponents(path: Vec<PathComponent>) -> Self {
         FilePath {
             path
         }
     }
-    
-    pub fn from_path(path: PathBuf, target: PathTarget) -> Self {
+
+    /// Creates a new file path from a real path.
+    ///
+    /// # Arguments
+    /// * `path` - The real path.
+    ///
+    /// # Returns
+    /// The file path.
+    pub fn from_realpath(path: PathBuf) -> Self {
         FilePath {
             path: vec![PathComponent {
                 path,
-                target
+                target: PathTarget::File
             }]
         }
     }
     
-    pub fn join(&mut self, path: PathBuf, target: PathTarget) {
-        self.path.push(PathComponent {
-            path,
-            target
-        });
+    pub fn join_realpath(&mut self, _path: PathBuf) {
+        todo!("implement")
     }
     
     pub fn extract_parent(&self, _temp_directory: &PathBuf) {
@@ -57,7 +107,14 @@ impl FilePath {
     pub fn delete_parent(&self, _temp_directory: &PathBuf) {
         todo!("implement")
     }
-    
+
+    /// Resolves the file path to a single file.
+    ///
+    /// # Returns
+    /// The resolved file path.
+    ///
+    /// # Errors
+    /// Never
     pub fn resolve_file(&self) -> Result<PathBuf> {
         if self.path.len() == 1 {
             match self.path[0].target {
@@ -68,12 +125,42 @@ impl FilePath {
         }
     }
 
-    pub fn child_real(&self, child_name: OsString) -> FilePath {
+    /// Gets the child of where the file path points to.
+    ///
+    /// # Arguments
+    /// * `child_name` - The name of the child.
+    ///
+    /// # Returns
+    /// The child file path.
+    ///
+    /// # Example
+    /// ```
+    /// use std::path::PathBuf;
+    /// use backup_deduplicator::path::FilePath;
+    ///
+    /// let path = FilePath::from_realpath(PathBuf::from("test/"));
+    /// let child = path.child("child.txt");
+    ///
+    /// assert_eq!(child.path[0].path, PathBuf::from("test/child.txt"));
+    /// assert_eq!(child.path.len(), 1);
+    /// ```
+    ///
+    /// ```
+    /// use std::path::PathBuf;
+    /// use backup_deduplicator::path::FilePath;
+    ///
+    /// let path = FilePath::from_realpath(PathBuf::from("test/"));
+    /// let subpath = path.child("subdir").child("abc.txt");
+    ///
+    /// assert_eq!(subpath.path[0].path, PathBuf::from("test/subdir/abc.txt"));
+    /// assert_eq!(subpath.path.len(), 1);
+    /// ```
+    pub fn child<Str: Into<OsString>>(&self, child_name: Str) -> FilePath {
         let mut result = FilePath {
             path: self.path.clone()
         };
         
-        let component = PathBuf::from(child_name);
+        let component = PathBuf::from(child_name.into());
         
         match result.path.last_mut() {
             Some(last) => {
@@ -89,7 +176,27 @@ impl FilePath {
         
         return result;
     }
-    
+
+    /// Gets the parent of the file path.
+    ///
+    /// # Returns
+    /// The parent file path. None if the file path has no parent.
+    ///
+    /// # Example
+    /// ```
+    /// use std::path::PathBuf;
+    /// use backup_deduplicator::path::FilePath;
+    ///
+    /// let path = FilePath::from_realpath(PathBuf::from("test/abc/def.txt"));
+    /// let parent = path.parent().unwrap();
+    ///
+    /// assert_eq!(parent.path[0].path, PathBuf::from("test/abc"));
+    ///
+    /// //                      test/abc          test/             ""        None
+    /// let root = path.parent().unwrap().parent().unwrap().parent().unwrap().parent();
+    ///
+    /// assert_eq!(root, None);
+    /// ```
     pub fn parent(&self) -> Option<FilePath> {
         let last = self.path.last();
         
@@ -126,6 +233,13 @@ impl FilePath {
 }
 
 impl PartialEq for FilePath {
+    /// Compares two file paths.
+    /// 
+    /// # Arguments
+    /// * `other` - The other file path.
+    /// 
+    /// # Returns
+    /// Whether the file paths are equal.
     fn eq(&self, other: &Self) -> bool {
         self.path.len() == other.path.len() && self.path.iter().zip(other.path.iter()).all(|(a, b)| a == b)
     }
@@ -134,6 +248,7 @@ impl PartialEq for FilePath {
 impl Eq for FilePath {}
 
 impl std::fmt::Display for FilePath {
+    /// Formats the file path to a string.
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
         let mut result = String::new();
         
diff --git a/src/lib.md b/src/lib.md
new file mode 100644
index 0000000..f778ae1
--- /dev/null
+++ b/src/lib.md
@@ -0,0 +1,107 @@
+# Inner-workings
+The tool is run in four stages:
+```plain
+     Input         Execution       Output                          
+┌───────────┐         ┌┐                                           
+│ HashTree  ◄─────────┼┼────────┐                                  
+│           │         ││        │                                  
+│(optional) ├──┐ ┌────▼▼────┐  ┌┴────────────────┐                 
+└───────────┘  └─►          │  │                 │                 
+                 │  Build   ├──►  HashTree       │                 
+┌───────────┐  ┌─►          │  │                 │                 
+│  Folder   ├──┘ └────┬┬────┘  └┬────────────────┘                 
+│   -file   │         ││        │                                  
+│   -file   │ ┌───────┼┼────────┘                                  
+└───┬────┬──┘ │       ││                                           
+    │    │    │  ┌────▼▼────┐  ┌─────────────────┐                 
+    │    │    │  │          │  │                 │                 
+    │    │    └──► Analyze  ├──► Duplicate Sets  │                 
+    │    │       │          │  │                 │                 
+    │    │       └────┬┬────┘  └┬────────────────┘                 
+    │    │            ││        │      Basic functionality complete
+----│----│----┌───────┼┼────────┘----------------------------------
+    │    │    │       ││                 Implementation in progress
+    │    │    │  ┌────▼▼────┐  ┌─────────────────┐                 
+    │    │    └──►          │  │                 │                 
+    │    │       │  Dedup   ├──► Change commands │                 
+    │    └───────►          │  │                 │                 
+    │            └────┬┬────┘  └┬────────────────┘                 
+    │                 ││        │                                  
+    │         ┌───────┼┼────────┘                                  
+    │         │       ││                                           
+    │         │  ┌────▼▼────┐                                      
+    │         └──►          │                                      
+    │            │ Execute  ├──►Deduplicated files                 
+    └────────────►          │                                      
+                 └──────────┘                                      
+```
+1. **Build**: The tools reads a folder and builds a hash tree of all files in it.
+2. **Analyze**: The tool analyzes the hash tree and finds duplicate files.
+3. **Dedup**: The tool determine which steps to take to deduplicate the files.
+This can be done in a half automatic or manual way.
+4. **Execute**: The tool executes the deduplication steps (Deleting/Hardlinking/...).
+
+**Dedup** and **Execute** are in development and currently not (fully) implemented.
+
+## Build
+* Input: Folder with files, Hashtree (optional) to update or continue from.
+* Output: HashTree
+* Execution: Fully automatic, no user interaction required, multithreaded.
+
+### HashTree file format
+The HashTree is stored in a file with the following format:
+```plain
+HEADER [newline]
+ENTRY [newline]
+ENTRY [newline]
+...
+```
+See `HashTreeFileEntry` for the exact format of an entry. In short, it contains
+every information about an analyzed file or directory that is needed for later
+stages (JSON):
+* File path
+* File type
+* Last modified time
+* File size
+* Hash of the file
+* Children hashes (if it is a directory)
+
+While analyzing entries are only appended to the file. After the analysis is
+done, the file is fed into the `clean` command that removes all entries that
+are outdated or do not exist anymore, rewriting the entire file (but only shrinking it).
+
+The `clean` command can also be run manually.
+
+## Analyze
+* Input: HashTree
+* Output: Duplicate sets
+* Execution: Fully automatic, no user interaction required, multithreaded file parsing,
+  single-threaded duplication detection.
+
+### Analysis results
+The analysis results are stored in a file with the following format:
+```plain
+[ENTRY] [newline]
+[ENTRY] [newline]
+...
+```
+See `ResultEntry` for the exact format of an entry. In short, it contains (JSON)
+* File type
+* Hash
+* Size (0 if it is a directory, else the file size of one of the files)
+* Conflicting Set (a set of all files that are duplicates of each other)
+
+## Dedup
+* Input: Duplicate sets
+* Output: Set of commands to execute to deduplicate the files
+* Execution: Manual or half-automatic, user interaction required.
+
+Implementation in progress. To the current date the duplicate sets
+must be manually processed.
+
+## Execute
+* Input: Set of commands
+* Output: Deduplicated files
+* Execution: Fully automatic, user interaction only on errors.
+
+Implementation in progress.
diff --git a/src/lib.rs b/src/lib.rs
index 707de38..cc836c9 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,32 +1,22 @@
+#![doc = include_str!("../README.md")]
+#![doc = include_str!("lib.md")]
+
 extern crate num_cpus;
 
 pub mod utils;
 
-mod cmd {
+pub mod pool;
+
+pub mod stages {
     pub mod build;
-    pub mod clean;
     pub mod analyze;
-}
-pub use cmd::*;
-
-pub mod data {
-    mod file;
-    pub use file::*;
-    mod fileid;
-    pub use fileid::*;
-    mod job;
-    pub use job::*;
-    mod path;
-    pub use path::*;
-    mod hash;
-    pub use hash::*;
-    
-    mod hashtree_save_file;
-    pub use hashtree_save_file::*;
+    pub mod clean;
 }
 
-pub mod main {
-    pub mod utils;
+mod data {
+    pub mod path;
+    pub mod hash;
+    pub mod fileid;
 }
 
-pub mod threadpool;
+pub use data::*;
diff --git a/src/main.rs b/src/main.rs
index d4433e4..5a290e0 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -2,11 +2,12 @@ use std::{env};
 use std::str::FromStr;
 use clap::{arg, Parser, Subcommand};
 use log::{debug, info, LevelFilter, trace};
-use backup_deduplicator::build::BuildSettings;
-use backup_deduplicator::{analyze, clean, main};
-use backup_deduplicator::analyze::AnalysisSettings;
-use backup_deduplicator::clean::CleanSettings;
-use backup_deduplicator::data::GeneralHashType;
+use backup_deduplicator::hash::GeneralHashType;
+use backup_deduplicator::stages::analyze::cmd::AnalysisSettings;
+use backup_deduplicator::stages::{analyze, build, clean};
+use backup_deduplicator::stages::build::cmd::BuildSettings;
+use backup_deduplicator::stages::clean::cmd::CleanSettings;
+use backup_deduplicator::utils;
 
 /// A simple command line tool to deduplicate backups.
 #[derive(Parser, Debug)]
@@ -46,15 +47,15 @@ enum Command {
         output: String,
         /// Absolute paths, if set, the tool will output absolute paths in the hash tree.
         /// If not set, the tool will output relative paths to the current working directory.
-        #[arg(long)]
-        absolute_paths: bool,
+        // #[arg(long)]
+        // absolute_paths: bool,
         /// Working directory, if set, the tool will use the current working directory as the base for relative paths.
         #[arg(short, long)]
         working_directory: Option<String>,
         /// Force overwrite, if set, the tool will overwrite the output file if it exists. If not set, the tool will continue an existing analysis
         #[arg(long="overwrite", default_value = "false")]
         recreate_output: bool,
-        /// Hash algorithm to use (values: sha256, sha512, sha1, xxh64, xxh32)
+        /// Hash algorithm to use
         #[arg(long="hash", default_value = "sha256")]
         hash_type: String,
         /// Disable database clean after run, if set the tool will not clean the database after the creation
@@ -130,7 +131,7 @@ fn main() {
             // archives,
             follow_symlinks,
             output,
-            absolute_paths,
+            // absolute_paths,
             working_directory,
             recreate_output,
             hash_type,
@@ -150,9 +151,9 @@ fn main() {
 
             // Convert to paths and check if they exist
 
-            let directory = main::utils::parse_path(directory.as_str(), main::utils::ParsePathKind::AbsoluteNonExisting);
-            let output = main::utils::parse_path(output.as_str(), main::utils::ParsePathKind::AbsoluteNonExisting);
-            let working_directory = working_directory.map(|w| main::utils::parse_path(w.as_str(), main::utils::ParsePathKind::AbsoluteNonExisting));
+            let directory = utils::main::parse_path(directory.as_str(), utils::main::ParsePathKind::AbsoluteNonExisting);
+            let output = utils::main::parse_path(output.as_str(), utils::main::ParsePathKind::AbsoluteNonExisting);
+            let working_directory = working_directory.map(|w| utils::main::parse_path(w.as_str(), utils::main::ParsePathKind::AbsoluteNonExisting));
 
             if !directory.exists() {
                 eprintln!("Target directory does not exist: {}", directory.display());
@@ -175,7 +176,7 @@ fn main() {
             // Change working directory
             trace!("Changing working directory");
 
-            let working_directory = main::utils::change_working_directory(working_directory);
+            let working_directory = utils::main::change_working_directory(working_directory);
 
             // Convert paths to relative path to working directory
 
@@ -188,17 +189,17 @@ fn main() {
             // info!("Archives: {:?}", archives);
             info!("Follow symlinks: {:?}", follow_symlinks);
             info!("Output: {:?}", output);
-            info!("Absolute paths: {:?}", absolute_paths);
+            // info!("Absolute paths: {:?}", absolute_paths);
             info!("Working directory: {:?}", working_directory);
 
             // Run the command
 
-            match backup_deduplicator::build::run(BuildSettings {
+            match build::cmd::run(BuildSettings {
                 directory: directory.to_path_buf(),
                 //into_archives: archives,
                 follow_symlinks,
                 output: output.clone(),
-                absolute_paths,
+                // absolute_paths,
                 threads: args.threads,
                 continue_file: !recreate_output,
                 hash_type
@@ -208,7 +209,7 @@ fn main() {
                     
                     if !no_clean {
                         info!("Executing clean command");
-                        match clean::run(CleanSettings {
+                        match clean::cmd::run(CleanSettings {
                             input: output.clone(),
                             output: output,
                             root: None,
@@ -239,13 +240,13 @@ fn main() {
             working_directory,
             follow_symlinks
         } => {
-            let input = main::utils::parse_path(input.as_str(), main::utils::ParsePathKind::AbsoluteNonExisting);
-            let output = main::utils::parse_path(output.as_str(), main::utils::ParsePathKind::AbsoluteNonExisting);
+            let input = utils::main::parse_path(input.as_str(), utils::main::ParsePathKind::AbsoluteNonExisting);
+            let output = utils::main::parse_path(output.as_str(), utils::main::ParsePathKind::AbsoluteNonExisting);
 
             // Change working directory
             trace!("Changing working directory");
 
-            main::utils::change_working_directory(working_directory.map(|w| main::utils::parse_path(w.as_str(), main::utils::ParsePathKind::AbsoluteNonExisting)));
+            utils::main::change_working_directory(working_directory.map(|w| utils::main::parse_path(w.as_str(), utils::main::ParsePathKind::AbsoluteNonExisting)));
 
             if !input.exists() {
                 eprintln!("Input file does not exist: {:?}", input);
@@ -257,7 +258,7 @@ fn main() {
                 std::process::exit(exitcode::CONFIG);
             }
             
-            match clean::run(CleanSettings {
+            match clean::cmd::run(CleanSettings {
                 input,
                 output,
                 root,
@@ -278,8 +279,8 @@ fn main() {
             output,
             overwrite
         } => {
-            let input = main::utils::parse_path(input.as_str(), main::utils::ParsePathKind::AbsoluteExisting);
-            let output = main::utils::parse_path(output.as_str(), main::utils::ParsePathKind::AbsoluteNonExisting);
+            let input = utils::main::parse_path(input.as_str(), utils::main::ParsePathKind::AbsoluteExisting);
+            let output = utils::main::parse_path(output.as_str(), utils::main::ParsePathKind::AbsoluteNonExisting);
 
             if !input.exists() {
                 eprintln!("Input file does not exist: {:?}", input);
@@ -291,7 +292,7 @@ fn main() {
                 std::process::exit(exitcode::CONFIG);
             }
 
-            match analyze::run(AnalysisSettings {
+            match analyze::cmd::run(AnalysisSettings {
                 input,
                 output,
                 threads: args.threads,
diff --git a/src/main/utils.rs b/src/main/utils.rs
deleted file mode 100644
index 5da5302..0000000
--- a/src/main/utils.rs
+++ /dev/null
@@ -1,61 +0,0 @@
-use std::env;
-use std::path::PathBuf;
-use crate::utils::LexicalAbsolute;
-
-pub fn change_working_directory(working_directory: Option<PathBuf>) -> PathBuf {
-    match working_directory {
-        None => {},
-        Some(working_directory) => {
-            env::set_current_dir(&working_directory).unwrap_or_else(|_| {
-                eprintln!("IO error, could not change working directory: {}", working_directory.display());
-                std::process::exit(exitcode::CONFIG);
-            });
-        }
-    }
-
-    env::current_dir().unwrap_or_else(|_| {
-        eprintln!("IO error, could not resolve working directory");
-        std::process::exit(exitcode::CONFIG);
-    }).canonicalize().unwrap_or_else(|_| {
-        eprintln!("IO error, could not resolve working directory");
-        std::process::exit(exitcode::CONFIG);
-    })
-}
-
-#[derive(Debug, Clone, Copy)]
-pub enum ParsePathKind {
-    Direct,
-    AbsoluteExisting,
-    AbsoluteNonExisting,
-}
-
-pub fn parse_path(path: &str, kind: ParsePathKind) -> PathBuf {
-    let path = std::path::Path::new(path);
-
-    let path = path.to_path_buf();
-
-    let path = match kind {
-        ParsePathKind::Direct => path,
-        ParsePathKind::AbsoluteExisting => to_lexical_absolute(path, true),
-        ParsePathKind::AbsoluteNonExisting => to_lexical_absolute(path, false),
-    };
-
-    path
-}
-
-pub fn to_lexical_absolute(path: PathBuf, exists: bool) -> PathBuf {
-    let path = match exists {
-        true => path.canonicalize(),
-        false => path.to_lexical_absolute(),
-    };
-
-    let path = match path{
-        Ok(out) => out,
-        Err(e) => {
-            eprintln!("IO error, could not resolve output file: {:?}", e);
-            std::process::exit(exitcode::CONFIG);
-        }
-    };
-    
-    path
-}
diff --git a/src/pool.rs b/src/pool.rs
new file mode 100644
index 0000000..68f4750
--- /dev/null
+++ b/src/pool.rs
@@ -0,0 +1,287 @@
+use std::sync::{Arc, mpsc, Mutex};
+use std::sync::mpsc::{Receiver, RecvTimeoutError, Sender};
+use std::thread;
+use std::time::Duration;
+use log::{debug, error, trace, warn};
+
+/// A trait that must be implemented by a job type to be processed by the pool.
+pub trait JobTrait<T: Send = Self> {
+    /// Get the job id.
+    /// 
+    /// # Returns
+    /// * `usize` - The job id.
+    fn job_id(&self) -> usize;
+}
+
+/// A trait that must be implemented by a result type to be returned by the pool.
+pub trait ResultTrait<T: Send = Self> {}
+
+/// Worker entry function signature
+/// The worker entry function is called by the worker thread to process a job.
+/// A custom worker must supply a function of this type to the thread pool to process jobs.
+/// 
+/// # Arguments
+/// * `usize` - The current worker id.
+/// * `Job` - The job received that should be processed.
+/// * `&Sender<Result>` - A sender to publish job results.
+/// * `&Sender<Job>` - A sender to publish new jobs to the thread pool.
+/// * `&mut Argument` - A mutable reference to the arguments passed to the worker thread via the thread pool creation.
+/// 
+/// # Returns
+/// * `()` - The worker entry function should not return a value but instead should send the result via the `Sender<Result>` back to the main thread.
+type WorkerEntry<Job, Result, Argument> = fn(usize, Job, &Sender<Result>, &Sender<Job>, &mut Argument);
+
+/// Internal worker struct to manage the worker thread via the thread pool.
+///
+/// # Fields
+/// * `id` - The worker id.
+/// * `thread` - The worker thread handle.
+struct Worker
+{
+    id: usize,
+    thread: Option<thread::JoinHandle<()>>,
+}
+
+impl Worker {
+    /// Create a new worker thread. Starts the worker thread and returns the worker struct.
+    /// 
+    /// # Arguments
+    /// * `id` - The worker id.
+    /// * `job_receive` - A receiver to receive jobs from the thread pool.
+    /// * `result_publish` - A sender to publish job results.
+    /// * `job_publish` - A sender to publish new jobs to the thread pool.
+    /// * `func` - The worker entry function to process jobs.
+    /// * `arg` - The arguments passed to the worker thread via the thread pool creation.
+    /// 
+    /// # Returns
+    /// * `Worker` - The worker struct with the worker thread handle.
+    fn new<Job: JobTrait + Send + 'static, Result: ResultTrait + Send + 'static, Argument: Send + 'static>(id: usize, job_receive: Arc<Mutex<Receiver<Job>>>, result_publish: Sender<Result>, job_publish: Sender<Job>, func: WorkerEntry<Job, Result, Argument>, arg: Argument) -> Worker {
+        let thread = thread::spawn(move || {
+            Worker::worker_entry(id, job_receive, result_publish, job_publish, func, arg);
+        });
+
+        Worker { id, thread: Some(thread) }
+    }
+
+    /// Function executed by the worker thread. Does exit when the job receiver is closed/the thread pool is shutting down.
+    /// 
+    /// # Arguments
+    /// * `id` - The worker id.
+    /// * `job_receive` - A receiver to receive jobs from the thread pool.
+    /// * `result_publish` - A sender to publish job results.
+    /// * `job_publish` - A sender to publish new jobs to the thread pool.
+    /// * `func` - The worker entry function to process jobs.
+    /// * `arg` - The arguments passed to the worker thread via the thread pool creation.
+    fn worker_entry<Job: JobTrait + Send + 'static, Result: ResultTrait + Send + 'static, Argument: Send + 'static>(id: usize, job_receive: Arc<Mutex<Receiver<Job>>>, result_publish: Sender<Result>, job_publish: Sender<Job>, func: WorkerEntry<Job, Result, Argument>, mut arg: Argument) {
+        loop {
+            // Acquire the job lock
+            let job = job_receive.lock();
+
+            let job = match job {
+                Err(e) => {
+                    error!("Worker {} shutting down {}", id, e);
+                    break;
+                }
+                Ok(job) => {
+                    job.recv() // receive new job
+                }
+            };
+
+            match job {
+                Err(_) => {
+                    trace!("Worker {} shutting down", id);
+                    break;
+                }
+                Ok(job) => {
+                    trace!("Worker {} received job {}", id, job.job_id());
+                    // Call the user function to process the job
+                    func(id, job, &result_publish, &job_publish, &mut arg);
+                }
+            }
+        }
+    }
+}
+
+/// A thread pool to manage the distribution of jobs to worker threads.
+/// 
+/// # Template Parameters
+/// * `Job` - The job type that should be processed by the worker threads.
+/// * `Result` - The result type that should be returned by the worker threads.
+/// 
+/// Both `Job` and `Result` must implement the `Send` trait.
+pub struct ThreadPool<Job, Result>
+where
+    Job: Send,
+    Result: Send,
+{
+    workers: Vec<Worker>,
+    thread: Option<thread::JoinHandle<()>>,
+    job_publish: Arc<Mutex<Option<Sender<Job>>>>,
+    result_receive: Receiver<Result>,
+}
+
+impl<Job: Send + JobTrait + 'static, Result: Send + ResultTrait + 'static> ThreadPool<Job, Result> {
+    /// Create a new thread pool with a given number of worker threads (args.len()).
+    /// Each worker thread will receive an argument from the args vector. When a new job
+    /// is published to the thread pool, the thread pool will distribute the job to the worker threads
+    /// and execute the `func` function within a worker thread.
+    /// 
+    /// # Arguments
+    /// * `args` - A vector of arguments that should be passed to the worker threads.
+    /// * `func` - The worker entry function to process jobs.
+    /// 
+    /// # Returns
+    /// * `ThreadPool` - The thread pool struct with the worker threads.
+    /// 
+    /// # Template Parameters
+    /// * `Argument` - The argument type that should be passed to the worker threads.
+    /// The argument type must implement the `Send` trait.
+    pub fn new<Argument: Send + 'static>(mut args: Vec<Argument>, func: WorkerEntry<Job, Result, Argument>) -> ThreadPool<Job, Result> {
+        assert!(args.len() > 0);
+
+        let mut workers = Vec::with_capacity(args.len());
+
+        let (job_publish, job_receive) = mpsc::channel();
+
+        let job_receive = Arc::new(Mutex::new(job_receive));
+        let (result_publish, result_receive) = mpsc::channel();
+        let (thread_publish_job, thread_receive_job) = mpsc::channel();
+
+        let mut id = 0;
+        while let Some(arg) = args.pop() {
+            workers.push(Worker::new(id, Arc::clone(&job_receive), result_publish.clone(), thread_publish_job.clone(), func, arg));
+            id += 1;
+        }
+
+        let job_publish = Arc::new(Mutex::new(Some(job_publish)));
+        let job_publish_clone = Arc::clone(&job_publish);
+
+        let thread = thread::spawn(move || {
+            ThreadPool::<Job, Result>::pool_entry(job_publish_clone, thread_receive_job);
+        });
+
+        ThreadPool {
+            workers,
+            job_publish,
+            result_receive,
+            thread: Some(thread),
+        }
+    }
+    
+    /// Publish a new job to the thread pool. The job will be distributed to a worker thread.
+    /// 
+    /// # Arguments
+    /// * `job` - The job that should be processed by a worker thread.
+    pub fn publish(&self, job: Job) {
+        let job_publish = self.job_publish.lock();
+        match job_publish {
+            Err(e) => {
+                error!("ThreadPool is shutting down. Cannot publish job. {}", e);
+            }
+            Ok(job_publish) => {
+                match job_publish.as_ref() {
+                    None => {
+                        error!("ThreadPool is shutting down. Cannot publish job.");
+                    }
+                    Some(job_publish) => {
+                        match job_publish.send(job) {
+                            Err(e) => {
+                                error!("Failed to publish job on thread pool. {}", e);
+                            }
+                            Ok(_) => {}
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    /// Internal function that is run in a separate thread. It feeds back jobs from the worker threads to the input of the thread pool.
+    /// 
+    /// # Arguments
+    /// * `job_publish` - A sender to publish new jobs to the thread pool.
+    /// * `job_receive` - A receiver to receive jobs from the worker threads.
+    fn pool_entry(job_publish: Arc<Mutex<Option<Sender<Job>>>>, job_receive: Receiver<Job>) {
+        loop {
+            let job = job_receive.recv();
+
+            match job {
+                Err(_) => {
+                    trace!("Pool worker shutting down");
+                    break;
+                }
+                Ok(job) => {
+                    match job_publish.lock() {
+                        Err(e) => {
+                            error!("Pool worker shutting down: {}", e);
+                            break;
+                        }
+                        Ok(job_publish) => {
+                            if let Some(job_publish) = job_publish.as_ref() {
+                                job_publish.send(job).expect("Pool worker failed to send job. This should never fail.");
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+    
+    /// Receive a result from the worker threads. This function will block until a result is available.
+    /// 
+    /// # Returns
+    /// * `Result` - The result of a job processed by a worker thread.
+    /// 
+    /// # Errors
+    /// * If all worker threads panicked, therefore the pipe is closed
+    pub fn receive(&self) -> std::result::Result<Result, mpsc::RecvError> {
+        self.result_receive.recv()
+    }
+
+    /// Receive a result from the worker threads. This function will block until a result is available or a timeout occurs.
+    /// 
+    /// # Arguments
+    /// * `timeout` - The maximum time to wait for a result.
+    /// 
+    /// # Returns
+    /// * `Result` - The result of a job processed by a worker thread.
+    /// 
+    /// # Errors
+    /// * If all worker threads panicked, therefore the pipe is closed
+    /// * If the timeout occurs before a result is available
+    pub fn receive_timeout(&self, timeout: Duration) -> std::result::Result<Result, RecvTimeoutError> {
+        self.result_receive.recv_timeout(timeout)
+    }
+}
+
+impl<Job: Send, Result: Send> Drop for ThreadPool<Job, Result> {
+    fn drop(&mut self) {
+        drop(self.job_publish.lock().expect("This should not break").take());
+
+        for worker in &mut self.workers {
+            debug!("Shutting down worker {}", worker.id);
+
+            if let Some(thread) = worker.thread.take() {
+                match thread.join() {
+                    Ok(_) => {
+                        trace!("Worker {} shut down", worker.id);
+                    }
+                    Err(_) => {
+                        warn!("Worker {} panicked", worker.id);
+                    }
+                }
+            }
+        }
+
+        if let Some(thread) = self.thread.take() {
+            match thread.join() {
+                Ok(_) => {
+                    trace!("ThreadPool shut down");
+                }
+                Err(_) => {
+                    warn!("ThreadPool worker panicked");
+                }
+            }
+        }
+    }
+}
diff --git a/src/stages/analyze.rs b/src/stages/analyze.rs
new file mode 100644
index 0000000..462034e
--- /dev/null
+++ b/src/stages/analyze.rs
@@ -0,0 +1,11 @@
+
+pub mod output {
+    mod dupset_file;
+    
+    pub use dupset_file::*;
+}
+
+pub mod cmd;
+mod worker;
+
+pub mod intermediary_analysis_data;
diff --git a/src/cmd/analyze.rs b/src/stages/analyze/cmd.rs
similarity index 75%
rename from src/cmd/analyze.rs
rename to src/stages/analyze/cmd.rs
index 8a374ad..4106bd1 100644
--- a/src/cmd/analyze.rs
+++ b/src/stages/analyze/cmd.rs
@@ -1,3 +1,4 @@
+use crate::stages::analyze::worker::AnalysisIntermediaryFile;
 use std::collections::HashMap;
 use std::fs;
 use std::io::Write;
@@ -7,17 +8,40 @@ use std::sync::{Arc, Mutex};
 use std::time::Duration;
 use anyhow::{anyhow, Result};
 use log::{error, info, trace};
-use crate::analyze::analysis::{AnalysisFile, ResultEntryRef};
-use crate::analyze::worker::{AnalysisJob, AnalysisResult, MarkedIntermediaryFile, WorkerArgument};
-use crate::data::{GeneralHash, SaveFile, SaveFileEntry, SaveFileEntryType};
-use crate::threadpool::ThreadPool;
+use crate::hash::{GeneralHash, GeneralHashType};
+use crate::pool::ThreadPool;
+use crate::stages::analyze::intermediary_analysis_data::AnalysisFile;
+use crate::stages::analyze::output::{DupSetEntryRef};
+use crate::stages::analyze::worker::{AnalysisJob, AnalysisResult, worker_run, AnalysisWorkerArgument};
+use crate::stages::build::output::{HashTreeFile, HashTreeFileEntry, HashTreeFileEntryType};
+use crate::utils::NullWriter;
 
+/// The settings for the analysis cmd.
+///
+/// # Fields
+/// * `input` - The input file to analyze.
+/// * `output` - The output file to write the results to.
+/// * `threads` - The number of threads to use for the analysis. If None, the number of threads is equal to the number of CPUs.
 pub struct AnalysisSettings {
     pub input: PathBuf,
     pub output: PathBuf,
     pub threads: Option<usize>,
 }
 
+/// Run the analysis cmd.
+///
+/// # Arguments
+/// * `analysis_settings` - The settings for the analysis cmd.
+///
+/// # Returns
+/// Nothing
+///
+/// # Errors
+/// * If the input file cannot be opened.
+/// * If the output file cannot be opened.
+/// * If the header of the input file cannot be loaded.
+/// * If an error occurs while loading entries from the input file.
+/// * If writing to the output file fails.
 pub fn run(analysis_settings: AnalysisSettings) -> Result<()> {
     let mut input_file_options = fs::File::options();
     input_file_options.read(true);
@@ -43,9 +67,10 @@ pub fn run(analysis_settings: AnalysisSettings) -> Result<()> {
     };
 
     let mut input_buf_reader = std::io::BufReader::new(&input_file);
+    let mut null_out_writer = NullWriter::new();
     let mut output_buf_writer = std::io::BufWriter::new(&output_file);
 
-    let mut save_file = SaveFile::new(&mut output_buf_writer, &mut input_buf_reader, true, true, true);
+    let mut save_file = HashTreeFile::new(&mut null_out_writer, &mut input_buf_reader, GeneralHashType::NULL, true, true, true);
     save_file.load_header()?;
 
     save_file.load_all_entries_no_filter()?;
@@ -56,7 +81,7 @@ pub fn run(analysis_settings: AnalysisSettings) -> Result<()> {
     let mut all_files = save_file.all_entries;
     
     for (path, entry) in file_by_path.iter_mut() {
-        file_by_path_marked.insert(path.clone(), MarkedIntermediaryFile {
+        file_by_path_marked.insert(path.clone(), AnalysisIntermediaryFile {
             saved_file_entry: Arc::clone(entry),
             file: Arc::new(Mutex::new(None)),
         });
@@ -82,12 +107,12 @@ pub fn run(analysis_settings: AnalysisSettings) -> Result<()> {
 
     let mut args = Vec::with_capacity(analysis_settings.threads.unwrap_or_else(|| num_cpus::get()));
     for _ in 0..args.capacity() {
-        args.push(WorkerArgument {
+        args.push(AnalysisWorkerArgument {
             file_by_path: Arc::clone(&file_by_path)
         });
     }
 
-    let pool: ThreadPool<AnalysisJob, AnalysisResult> = ThreadPool::new(args, crate::cmd::analyze::worker::worker_run);
+    let pool: ThreadPool<AnalysisJob, AnalysisResult> = ThreadPool::new(args, worker_run);
     
     for entry in &all_files {
         pool.publish(AnalysisJob::new(Arc::clone(entry)));
@@ -171,12 +196,17 @@ pub fn run(analysis_settings: AnalysisSettings) -> Result<()> {
     Ok(())
 }
 
+/// Used to find duplicates of entries in the hash tree file.
 #[derive(Debug, PartialEq, Hash, Eq)]
 struct SetKey<'a> {
     size: u64,
-    ftype: &'a SaveFileEntryType,
+    ftype: &'a HashTreeFileEntryType,
+    children: &'a Vec<GeneralHash>,
 }
-fn write_result_entry(file: &AnalysisFile, file_by_hash: &HashMap<GeneralHash, Vec<Arc<SaveFileEntry>>>, output_buf_writer: &mut std::io::BufWriter<&fs::File>) -> u64 {
+/// Write the result entry to the output file. Find all duplicates of the file and write them to the output file.
+/// If called for every file, it will write all duplicates to the output file.
+/// Writing each file only once
+fn write_result_entry(file: &AnalysisFile, file_by_hash: &HashMap<GeneralHash, Vec<Arc<HashTreeFileEntry>>>, output_buf_writer: &mut std::io::BufWriter<&fs::File>) -> u64 {
     let hash = match file {
         AnalysisFile::File(info) => &info.content_hash,
         AnalysisFile::Directory(info) => &info.content_hash,
@@ -186,12 +216,13 @@ fn write_result_entry(file: &AnalysisFile, file_by_hash: &HashMap<GeneralHash, V
         }
     };
     
-    let mut sets: HashMap<SetKey, Vec<&SaveFileEntry>> = HashMap::new();
+    let mut sets: HashMap<SetKey, Vec<&HashTreeFileEntry>> = HashMap::new();
 
     for file in file_by_hash.get(hash).unwrap() {
         sets.entry(SetKey {
             size: file.size,
-            ftype: &file.file_type
+            ftype: &file.file_type,
+            children: &file.children,
         }).or_insert(Vec::new()).push(file);
     }
     
@@ -212,7 +243,7 @@ fn write_result_entry(file: &AnalysisFile, file_by_hash: &HashMap<GeneralHash, V
             conflicting.push(&file.path);
         }
         
-        let result = ResultEntryRef {
+        let result = DupSetEntryRef {
             ftype: &set.0.ftype,
             size: set.0.size,
             hash,
@@ -226,6 +257,3 @@ fn write_result_entry(file: &AnalysisFile, file_by_hash: &HashMap<GeneralHash, V
     
     return result_size;
 }
-
-mod worker;
-pub mod analysis;
\ No newline at end of file
diff --git a/src/stages/analyze/intermediary_analysis_data.rs b/src/stages/analyze/intermediary_analysis_data.rs
new file mode 100644
index 0000000..1f18576
--- /dev/null
+++ b/src/stages/analyze/intermediary_analysis_data.rs
@@ -0,0 +1,94 @@
+use std::sync::Weak;
+use std::sync::{Arc, Mutex};
+use serde::{Deserialize, Serialize};
+use crate::hash::GeneralHash;
+use crate::path::FilePath;
+
+/// The result of the analysis worker.
+#[derive(Debug, Serialize, Deserialize)]
+pub enum AnalysisFile {
+    File(AnalysisFileInformation),
+    Directory(AnalysisDirectoryInformation),
+    Symlink(AnalysisSymlinkInformation),
+    Other(AnalysisOtherInformation),
+}
+
+impl AnalysisFile {
+    /// Get the parent of the file.
+    ///
+    /// # Returns
+    /// The parent of the file. None if the file has no parent.
+    pub fn parent(&self) -> &Mutex<Option<Weak<AnalysisFile>>> {
+        match self {
+            AnalysisFile::File(info) => &info.parent,
+            AnalysisFile::Directory(info) => &info.parent,
+            AnalysisFile::Symlink(info) => &info.parent,
+            AnalysisFile::Other(info) => &info.parent,
+        }
+    }
+
+    /// Get the path of the file.
+    ///
+    /// # Returns
+    /// The path of the file.
+    pub fn path(&self) -> &FilePath {
+        match self {
+            AnalysisFile::File(info) => &info.path,
+            AnalysisFile::Directory(info) => &info.path,
+            AnalysisFile::Symlink(info) => &info.path,
+            AnalysisFile::Other(info) => &info.path,
+        }
+    }
+}
+
+/// File information part of [AnalysisFile].
+///
+/// # Fields
+/// * `path` - The path of the file.
+/// * `content_hash` - The hash of the file content.
+/// * `parent` - The parent of the file.
+#[derive(Debug, Serialize, Deserialize)]
+pub struct AnalysisFileInformation {
+    pub path: FilePath,
+    pub content_hash: GeneralHash,
+    pub parent: Mutex<Option<Weak<AnalysisFile>>>,
+}
+
+/// Directory information part of [AnalysisFile].
+///
+/// # Fields
+/// * `path` - The path of the directory.
+/// * `content_hash` - The hash of the directory content.
+/// * `children` - The children of the directory.
+/// * `parent` - The parent of the directory.
+#[derive(Debug, Serialize, Deserialize)]
+pub struct AnalysisDirectoryInformation {
+    pub path: FilePath,
+    pub content_hash: GeneralHash,
+    pub children: Mutex<Vec<Arc<AnalysisFile>>>,
+    pub parent: Mutex<Option<Weak<AnalysisFile>>>,
+}
+
+/// Symlink information part of [AnalysisFile].
+///
+/// # Fields
+/// * `path` - The path of the symlink.
+/// * `content_hash` - The hash of the symlink content.
+/// * `parent` - The parent of the symlink.
+#[derive(Debug, Serialize, Deserialize)]
+pub struct AnalysisSymlinkInformation {
+    pub path: FilePath,
+    pub content_hash: GeneralHash,
+    pub parent: Mutex<Option<Weak<AnalysisFile>>>,
+}
+
+/// Other information part of [AnalysisFile].
+///
+/// # Fields
+/// * `path` - The path of the file.
+/// * `parent` - The parent of the file.
+#[derive(Debug, Serialize, Deserialize)]
+pub struct AnalysisOtherInformation {
+    pub path: FilePath,
+    pub parent: Mutex<Option<Weak<AnalysisFile>>>,
+}
diff --git a/src/stages/analyze/output/dupset_file.rs b/src/stages/analyze/output/dupset_file.rs
new file mode 100644
index 0000000..9cb0da6
--- /dev/null
+++ b/src/stages/analyze/output/dupset_file.rs
@@ -0,0 +1,19 @@
+use serde::{Serialize};
+use crate::hash::GeneralHash;
+use crate::path::FilePath;
+use crate::stages::build::output::HashTreeFileEntryType;
+
+/// The result of the analysis worker. A duplicate set entry.
+/// 
+/// # Fields
+/// * `ftype` - The type of the file.
+/// * `size` - The size of the file.
+/// * `hash` - The hash of the file content.
+/// * `conflicting` - The conflicting files.
+#[derive(Debug, Serialize)]
+pub struct DupSetEntryRef<'a, 'b, 'c> {
+    pub ftype: &'a HashTreeFileEntryType,
+    pub size: u64,
+    pub hash: &'b GeneralHash,
+    pub conflicting: Vec<&'c FilePath>,
+}
diff --git a/src/cmd/analyze/worker.rs b/src/stages/analyze/worker.rs
similarity index 54%
rename from src/cmd/analyze/worker.rs
rename to src/stages/analyze/worker.rs
index 9a3432e..3d6fbc3 100644
--- a/src/cmd/analyze/worker.rs
+++ b/src/stages/analyze/worker.rs
@@ -3,27 +3,51 @@ use std::ops::Deref;
 use std::sync::{Arc, Mutex};
 use std::sync::mpsc::Sender;
 use log::error;
-use crate::data::{FilePath, JobTrait, ResultTrait, SaveFileEntry, SaveFileEntryType};
-use super::analysis::{DirectoryInformation, AnalysisFile, FileInformation, OtherInformation, SymlinkInformation};
+use crate::path::FilePath;
+use crate::pool::{JobTrait, ResultTrait};
+use crate::stages::analyze::intermediary_analysis_data::{AnalysisFile, AnalysisDirectoryInformation, AnalysisFileInformation, AnalysisOtherInformation, AnalysisSymlinkInformation};
+use crate::stages::build::output::{HashTreeFileEntry, HashTreeFileEntryType};
 
+/// The intermediary file for the analysis worker.
+///
+/// # Fields
+/// * `saved_file_entry` - A saved file entry from the hash tree file.
+/// * `file` - Analysis result of the file. Processed by a worker.
 #[derive(Debug)]
-pub struct MarkedIntermediaryFile {
-    pub saved_file_entry: Arc<SaveFileEntry>,
+pub struct AnalysisIntermediaryFile {
+    pub saved_file_entry: Arc<HashTreeFileEntry>,
     pub file: Arc<Mutex<Option<Arc<AnalysisFile>>>>,
 }
 
-pub struct WorkerArgument {
-    pub file_by_path: Arc<HashMap<FilePath, MarkedIntermediaryFile>>,
+/// The argument for the analysis worker main thread.
+/// Files from the hash tree file are stored in a hash map.
+///
+/// # Fields
+/// * `file_by_path` - A hash map of [FilePath] -> [AnalysisIntermediaryFile].
+pub struct AnalysisWorkerArgument {
+    pub file_by_path: Arc<HashMap<FilePath, AnalysisIntermediaryFile>>,
 }
 
+/// The job for the analysis worker.
+///
+/// # Fields
+/// * `id` - The id of the job.
+/// * `file` - The file to analyze.
 #[derive(Debug)]
 pub struct AnalysisJob {
     id: usize,
-    pub file: Arc<SaveFileEntry>,
+    pub file: Arc<HashTreeFileEntry>,
 }
 
 impl AnalysisJob {
-    pub fn new(file: Arc<SaveFileEntry>) -> Self {
+    /// Create a new analysis job.
+    ///
+    /// # Arguments
+    /// * `file` - The file to analyze.
+    ///
+    /// # Returns
+    /// The analysis job.
+    pub fn new(file: Arc<HashTreeFileEntry>) -> Self {
         Self {
             id: new_job_counter_id(),
             file,
@@ -31,6 +55,16 @@ impl AnalysisJob {
     }
 }
 
+impl JobTrait for AnalysisJob {
+    /// Get the job id.
+    ///
+    /// # Returns
+    /// The job id.
+    fn job_id(&self) -> usize {
+        self.id
+    }
+}
+
 static JOB_COUNTER: Mutex<usize> = Mutex::new(0);
 
 fn new_job_counter_id() -> usize {
@@ -39,24 +73,24 @@ fn new_job_counter_id() -> usize {
     (*counter).clone()
 }
 
-impl JobTrait for AnalysisJob {
-    fn job_id(&self) -> usize {
-        self.id
-    }
-}
 
+/// The result for the analysis worker.
 #[derive(Debug)]
-pub struct AnalysisResult {
-    
-}
-
-impl ResultTrait for AnalysisResult {
-
-}
+pub struct AnalysisResult {}
 
+impl ResultTrait for AnalysisResult {}
 
 
-fn parent_file<'a, 'b>(file: &'b MarkedIntermediaryFile, arg: &'a WorkerArgument) -> Option<(&'a Arc<Mutex<Option<Arc<AnalysisFile>>>>, FilePath)> {
+/// Get the parent file of a file. Searches the arg.cache for the parent file.
+///
+/// # Arguments
+/// * `file` - The file to get the parent of.
+/// * `arg` - The argument for the worker thread.
+///
+/// # Returns
+/// The parent file and the parent path.
+/// If the parent file is not present, return None.
+fn parent_file<'a, 'b>(file: &'b AnalysisIntermediaryFile, arg: &'a AnalysisWorkerArgument) -> Option<(&'a Arc<Mutex<Option<Arc<AnalysisFile>>>>, FilePath)> {
     match file.saved_file_entry.path.parent() {
         None => None,
         Some(parent_path) => {
@@ -73,35 +107,42 @@ fn parent_file<'a, 'b>(file: &'b MarkedIntermediaryFile, arg: &'a WorkerArgument
     }
 }
 
-fn recursive_process_file(path: &FilePath, arg: &WorkerArgument) {
+/// Recursively process a file. Iterates over the file and its parent files until
+/// the parent file is present or the root is reached.
+///
+/// # Arguments
+/// * `id` - The id of the worker.
+/// * `path` - The path of the file to process.
+/// * `arg` - The argument for the worker thread.
+fn recursive_process_file(id: usize, path: &FilePath, arg: &AnalysisWorkerArgument) {
     let marked_file = arg.file_by_path.get(path);
     
     let mut attach_parent = None;
     
     if let Some(file) = marked_file {
         let result = match file.saved_file_entry.file_type {
-            SaveFileEntryType::File => {
-                AnalysisFile::File(FileInformation {
+            HashTreeFileEntryType::File => {
+                AnalysisFile::File(AnalysisFileInformation {
                     path: file.saved_file_entry.path.clone(),
                     content_hash: file.saved_file_entry.hash.clone(),
                     parent: Mutex::new(None),
                 })
             },
-            SaveFileEntryType::Symlink => {
-                AnalysisFile::Symlink(SymlinkInformation {
+            HashTreeFileEntryType::Symlink => {
+                AnalysisFile::Symlink(AnalysisSymlinkInformation {
                     path: file.saved_file_entry.path.clone(),
                     content_hash: file.saved_file_entry.hash.clone(),
                     parent: Mutex::new(None),
                 })
             },
-            SaveFileEntryType::Other => {
-                AnalysisFile::Other(OtherInformation {
+            HashTreeFileEntryType::Other => {
+                AnalysisFile::Other(AnalysisOtherInformation {
                     path: file.saved_file_entry.path.clone(),
                     parent: Mutex::new(None),
                 })
             },
-            SaveFileEntryType::Directory => {
-                AnalysisFile::Directory(DirectoryInformation {
+            HashTreeFileEntryType::Directory => {
+                AnalysisFile::Directory(AnalysisDirectoryInformation {
                     path: file.saved_file_entry.path.clone(),
                     content_hash: file.saved_file_entry.hash.clone(),
                     children: Mutex::new(Vec::new()),
@@ -120,7 +161,7 @@ fn recursive_process_file(path: &FilePath, arg: &WorkerArgument) {
                 }
             },
             Err(err) => {
-                panic!("Failed to lock file: {}", err);
+                panic!("[{}] Failed to lock file: {}", id, err);
             }
         }
 
@@ -130,17 +171,17 @@ fn recursive_process_file(path: &FilePath, arg: &WorkerArgument) {
     }
     
     if let Some((result, parent, parent_path)) = attach_parent {
-        match add_to_parent_as_child(parent, &result) {
+        match add_to_parent_as_child(id, parent, &result) {
             AddToParentResult::Ok => { return; },
             AddToParentResult::ParentDoesNotExist => {
                 // parent does not exist
                 // create it
-                recursive_process_file(&parent_path, arg);
+                recursive_process_file(id, &parent_path, arg);
                 // try to read to parent again
-                match add_to_parent_as_child(parent, &result) {
+                match add_to_parent_as_child(id, parent, &result) {
                     AddToParentResult::Ok => { return; },
                     AddToParentResult::ParentDoesNotExist => {
-                        error!("Parent still does not exist");
+                        error!("[{}] Parent still does not exist", id);
                         return;
                     },
                     AddToParentResult::Error => {
@@ -155,13 +196,28 @@ fn recursive_process_file(path: &FilePath, arg: &WorkerArgument) {
     }
 }
 
+/// The result of adding a file to a parent as child, see [add_to_parent_as_child]
+///
+/// # Variants
+/// * `Ok` - The operation was successful.
+/// * `ParentDoesNotExist` - The parent does not exist.
+/// * `Error` - An error occurred during the operation
 enum AddToParentResult {
     Ok,
     ParentDoesNotExist,
     Error,
 }
 
-fn add_to_parent_as_child(parent: &Arc<Mutex<Option<Arc<AnalysisFile>>>>, child: &Arc<AnalysisFile>) -> AddToParentResult {
+/// Add a file to a parent as a child.
+///
+/// # Arguments
+/// * `id` - The id of the worker.
+/// * `parent` - The parent file.
+/// * `child` - The child file.
+///
+/// # Returns
+/// The result of the operation.
+fn add_to_parent_as_child(id: usize, parent: &Arc<Mutex<Option<Arc<AnalysisFile>>>>, child: &Arc<AnalysisFile>) -> AddToParentResult {
     match parent.lock() {
         Ok(guard) => {
             // exclusive access to parent file
@@ -175,7 +231,7 @@ fn add_to_parent_as_child(parent: &Arc<Mutex<Option<Arc<AnalysisFile>>>>, child:
                             *guard = Some(Arc::downgrade(parent));
                         },
                         Err(err) => {
-                            error!("Failed to lock parent: {}", err);
+                            error!("[{}] Failed to lock parent: {}", id, err);
                             return AddToParentResult::Error;
                         }
                     }
@@ -189,13 +245,13 @@ fn add_to_parent_as_child(parent: &Arc<Mutex<Option<Arc<AnalysisFile>>>>, child:
                                     AddToParentResult::Ok
                                 },
                                 Err(err) => {
-                                    error!("Failed to lock children: {}", err);
+                                    error!("[{}] Failed to lock children: {}", id, err);
                                     AddToParentResult::Error
                                 }
                             }
                         },
                         _ => {
-                            error!("Parent is not a directory");
+                            error!("[{}] Parent is not a directory", id);
                             AddToParentResult::Error
                         }
                     }
@@ -207,12 +263,15 @@ fn add_to_parent_as_child(parent: &Arc<Mutex<Option<Arc<AnalysisFile>>>>, child:
             }
         },
         Err(err) => {
-            error!("Failed to lock file: {}", err);
+            error!("[{}] Failed to lock file: {}", id, err);
             AddToParentResult::Error
         }
     }
 }
 
-pub fn worker_run(_id: usize, job: AnalysisJob, _result_publish: &Sender<AnalysisResult>, _job_publish: &Sender<AnalysisJob>, arg: &mut WorkerArgument) {
-    recursive_process_file(&job.file.path, arg);
+/// The main function for the analysis worker.
+///
+/// # Arguments
+pub fn worker_run(id: usize, job: AnalysisJob, _result_publish: &Sender<AnalysisResult>, _job_publish: &Sender<AnalysisJob>, arg: &mut AnalysisWorkerArgument) {
+    recursive_process_file(id, &job.file.path, arg);
 }
diff --git a/src/stages/build.rs b/src/stages/build.rs
new file mode 100644
index 0000000..00862f9
--- /dev/null
+++ b/src/stages/build.rs
@@ -0,0 +1,17 @@
+
+pub mod output {
+    pub mod converter;
+    mod hashtreefile;
+    
+    pub use hashtreefile::*;
+}
+
+pub mod cmd {
+    mod cmd;
+    pub mod job;
+    pub mod worker;
+    
+    pub use cmd::*;
+}
+
+pub mod intermediary_build_data;
diff --git a/src/cmd/build.rs b/src/stages/build/cmd/cmd.rs
similarity index 63%
rename from src/cmd/build.rs
rename to src/stages/build/cmd/cmd.rs
index e62b6f8..bba9f5e 100644
--- a/src/cmd/build.rs
+++ b/src/stages/build/cmd/cmd.rs
@@ -3,41 +3,46 @@ use std::fs;
 use std::path::{PathBuf};
 use std::sync::Arc;
 use anyhow::{anyhow, Result};
-use serde::Serialize;
-use crate::build::worker::{worker_run, WorkerArgument};
-use crate::data::{FilePath, GeneralHashType, Job, PathTarget, ResultTrait, File, SaveFile, SaveFileEntryRef, SaveFileEntry};
-use crate::threadpool::ThreadPool;
-
-mod worker;
+use crate::hash::GeneralHashType;
+use crate::path::{FilePath};
+use crate::pool::ThreadPool;
+use crate::stages::build::cmd::job::{BuildJob, JobResult};
+use crate::stages::build::cmd::worker::{worker_run, WorkerArgument};
+use crate::stages::build::output::{HashTreeFile, HashTreeFileEntry, HashTreeFileEntryRef};
 
+/// The settings for the build command.
+/// 
+/// # Fields
+/// * `directory` - The directory to build.
+/// * `follow_symlinks` - Whether to follow symlinks when traversing the file system.
+/// * `output` - The output file to write the hash tree to.
+/// * `threads` - The number of threads to use for building the hash tree. None = number of logical CPUs.
+/// * `hash_type` - The hash algorithm to use for hashing files.
+/// * `continue_file` - Whether to continue an existing hash tree file.
 pub struct BuildSettings {
     pub directory: PathBuf,
     // pub into_archives: bool,
     pub follow_symlinks: bool,
     pub output: PathBuf,
-    pub absolute_paths: bool,
+    // pub absolute_paths: bool,
     pub threads: Option<usize>,
     
     pub hash_type: GeneralHashType,
     pub continue_file: bool,
 }
 
-#[derive(Debug, Serialize, Clone)]
-struct JobResultContent {
-    already_cached: bool,
-    content: File,
-}
-
-#[derive(Debug, Serialize, Clone)]
-enum JobResult {
-    Final(JobResultContent),
-    Intermediate(JobResultContent),
-}
-
-impl ResultTrait for JobResult {
-    
-}
-
+/// Runs the build command. Hashes a directory and produces a hash tree file.
+/// 
+/// # Arguments
+/// * `build_settings` - The settings for the build command.
+/// 
+/// # Returns
+/// Nothing
+/// 
+/// # Errors
+/// * If the output file cannot be opened.
+/// * If the header cannot be loaded from the output file (if the file is continued).
+/// * If the output file cannot be written to.
 pub fn run(
     build_settings: BuildSettings,
 ) -> Result<()> {
@@ -63,7 +68,7 @@ pub fn run(
     let mut result_in = std::io::BufReader::new(&result_file);
     let mut result_out = std::io::BufWriter::new(&result_file);
     
-    let mut save_file = SaveFile::new(&mut result_out, &mut result_in, false, true, false);
+    let mut save_file = HashTreeFile::new(&mut result_out, &mut result_in, build_settings.hash_type, false, true, false);
     match save_file.load_header() {
         Ok(_) => {},
         Err(err) => {
@@ -75,6 +80,7 @@ pub fn run(
         }
     }
     
+    // load all existing entries from the hash tree file
     match save_file.load_all_entries_no_filter() {
         Ok(_) => {},
         Err(err) => {
@@ -86,7 +92,7 @@ pub fn run(
     save_file.empty_file_by_hash();
     save_file.empty_entry_list();
     
-    let mut file_by_hash: HashMap<FilePath, SaveFileEntry> = HashMap::with_capacity(save_file.file_by_hash.len());
+    let mut file_by_hash: HashMap<FilePath, HashTreeFileEntry> = HashMap::with_capacity(save_file.file_by_hash.len());
     save_file.file_by_path.drain().for_each(|(k, v)| {
         file_by_hash.insert(k, Arc::into_inner(v).expect("There should be no further references to the entry"));
     });
@@ -103,10 +109,10 @@ pub fn run(
         });
     }
     
-    let pool: ThreadPool<Job, JobResult> = ThreadPool::new(args, worker_run);
+    let pool: ThreadPool<BuildJob, JobResult> = ThreadPool::new(args, worker_run);
 
-    let root_file = FilePath::from_path(build_settings.directory, PathTarget::File);
-    let root_job = Job::new(None, root_file);
+    let root_file = FilePath::from_realpath(build_settings.directory);
+    let root_job = BuildJob::new(None, root_file);
     
     pool.publish(root_job);
 
@@ -124,7 +130,7 @@ pub fn run(
         };
         
         if !result.already_cached {
-            let entry = SaveFileEntryRef::from(&result.content);
+            let entry = HashTreeFileEntryRef::from(&result.content);
             save_file.write_entry_ref(&entry)?;
         }
         
diff --git a/src/stages/build/cmd/job.rs b/src/stages/build/cmd/job.rs
new file mode 100644
index 0000000..4b9d7ce
--- /dev/null
+++ b/src/stages/build/cmd/job.rs
@@ -0,0 +1,116 @@
+use std::sync::{Arc, Mutex};
+use serde::Serialize;
+use crate::stages::build::intermediary_build_data::BuildFile;
+use crate::path::FilePath;
+use crate::pool::{JobTrait, ResultTrait};
+
+pub type SharedBuildJob = Arc<BuildJob>;
+
+static JOB_COUNTER: Mutex<usize> = Mutex::new(0);
+
+fn new_job_counter_id() -> usize {
+    let mut counter = JOB_COUNTER.lock().expect("Failed to lock job counter");
+    *counter += 1;
+    (*counter).clone()
+}
+
+/// The state of a build job. Used to track the state of a directory process job.
+/// 
+/// # Fields
+/// * `NotProcessed` - The job has not been processed yet.
+/// * `Analyzed` - The directory has been expanded and can be analyzed further.
+#[derive(Debug, Clone, PartialEq, Copy)]
+pub enum BuildJobState {
+    NotProcessed,
+    Analyzed,
+}
+
+/// A build job. Used to issue a job to hash a file/directory.
+/// 
+/// # Fields
+/// * `parent` - The parent job of this job.
+/// * `finished_children` - The finished children of this job.
+/// * `target_path` - The path of the file/directory to hash.
+/// * `state` - The state of the job.
+#[derive(Debug)]
+pub struct BuildJob {
+    id: usize,
+    pub parent: Option<SharedBuildJob>,
+    pub finished_children: Mutex<Vec<BuildFile>>,
+    pub target_path: FilePath,
+    pub state: BuildJobState,
+}
+
+impl BuildJob {
+    /// Create a new build job.
+    /// 
+    /// # Arguments
+    /// * `parent` - The parent job of this job.
+    /// * `target_path` - The path of the file/directory to hash.
+    /// 
+    /// # Returns
+    /// The created build job.
+    pub fn new(parent: Option<SharedBuildJob>, target_path: FilePath) -> Self {
+        BuildJob {
+            id: new_job_counter_id(),
+            parent,
+            target_path,
+            state: BuildJobState::NotProcessed,
+            finished_children: Mutex::new(Vec::new()),
+        }
+    }
+    
+    /// Get the job id.
+    /// 
+    /// # Returns
+    /// The job id.
+    pub fn job_id(&self) -> usize {
+        self.id
+    }
+
+    /// Create and assign a new unique job id.
+    /// 
+    /// # Returns
+    /// The build job with the new job id.
+    pub fn new_job_id(mut self) -> Self {
+        self.id = new_job_counter_id();
+        self
+    }
+}
+
+impl JobTrait for BuildJob {
+    /// Get the job id.
+    /// 
+    /// # Returns
+    /// The job id.
+    fn job_id(&self) -> usize {
+        BuildJob::job_id(self)
+    }
+}
+
+/// The result of a build job.
+///
+/// # Fields
+/// * `already_cached` - Whether the content was already cached.
+/// * `content` - The content of the job result.
+#[derive(Debug, Serialize, Clone)]
+pub struct JobResultContent {
+    pub already_cached: bool,
+    pub content: BuildFile,
+}
+
+/// A job result.
+/// 
+/// # Fields
+/// * `Final` - The final result of command. Returned if the job has no parent.
+/// * `Intermediate` - An intermediate result of a command. Returned if the job has a parent.
+#[derive(Debug, Serialize, Clone)]
+pub enum JobResult {
+    Final(JobResultContent),
+    Intermediate(JobResultContent),
+}
+
+impl ResultTrait for JobResult {
+
+}
+
diff --git a/src/cmd/build/worker.rs b/src/stages/build/cmd/worker.rs
similarity index 58%
rename from src/cmd/build/worker.rs
rename to src/stages/build/cmd/worker.rs
index ee0df35..e0bbcae 100644
--- a/src/cmd/build/worker.rs
+++ b/src/stages/build/cmd/worker.rs
@@ -1,3 +1,4 @@
+use crate::stages::build::intermediary_build_data::{BuildFile, BuildOtherInformation, BuildStubInformation};
 use std::collections::HashMap;
 use std::fs;
 use std::sync::Arc;
@@ -5,25 +6,41 @@ use std::sync::mpsc::Sender;
 use std::time::SystemTime;
 use anyhow::anyhow;
 use log::{error, info, trace, warn};
-use crate::build::{JobResult, JobResultContent};
-use crate::build::worker::directory::worker_run_directory;
-use crate::build::worker::file::worker_run_file;
-use crate::build::worker::other::worker_run_other;
-use crate::build::worker::symlink::worker_run_symlink;
-use crate::data::{File, FilePath, GeneralHashType, Job, OtherInformation, SaveFileEntry, StubInformation};
+use crate::hash::GeneralHashType;
+use crate::path::FilePath;
+use crate::stages::build::cmd::job::{BuildJob, JobResult, JobResultContent};
+use crate::stages::build::cmd::worker::directory::worker_run_directory;
+use crate::stages::build::cmd::worker::file::worker_run_file;
+use crate::stages::build::cmd::worker::other::worker_run_other;
+use crate::stages::build::cmd::worker::symlink::worker_run_symlink;
+use crate::stages::build::output::HashTreeFileEntry;
 
 mod directory;
 mod file;
 mod other;
 mod symlink;
 
+/// The argument for the worker main thread.
+/// 
+/// # Fields
+/// * `follow_symlinks` - Whether to follow symlinks when traversing the file system.
+/// * `hash_type` - The hash algorithm to use for hashing files.
+/// * `save_file_by_path` - A hash map of [FilePath] -> [HashTreeFileEntry].
 pub struct WorkerArgument {
     pub follow_symlinks: bool,
     pub hash_type: GeneralHashType,
-    pub save_file_by_path: Arc<HashMap<FilePath, SaveFileEntry>>,
+    pub save_file_by_path: Arc<HashMap<FilePath, HashTreeFileEntry>>,
 }
 
-pub fn worker_run(id: usize, job: Job, result_publish: &Sender<JobResult>, job_publish: &Sender<Job>, arg: &mut WorkerArgument) {
+/// Main function for the worker thread.
+/// 
+/// # Arguments
+/// * `id` - The id of the worker.
+/// * `job` - The job to process.
+/// * `result_publish` - The channel to publish the result to.
+/// * `job_publish` - The channel to publish new jobs to.
+/// * `arg` - The argument for the worker thread.
+pub fn worker_run(id: usize, job: BuildJob, result_publish: &Sender<JobResult>, job_publish: &Sender<BuildJob>, arg: &mut WorkerArgument) {
     let path = job.target_path.resolve_file();
     let path = match path {
         Ok(file) => file,
@@ -82,6 +99,11 @@ pub fn worker_run(id: usize, job: Job, result_publish: &Sender<JobResult>, job_p
     }
 }
 
+/// Publish a result to the result channel.
+/// Processes the error if the result could not be published.
+/// 
+/// # Error
+/// Never, issues a warning instead
 fn worker_publish_result(id: usize, result_publish: &Sender<JobResult>, result: JobResult) {
     match result_publish.send(result) {
         Ok(_) => {},
@@ -91,15 +113,34 @@ fn worker_publish_result(id: usize, result_publish: &Sender<JobResult>, result:
     }
 }
 
-fn worker_create_error(path: FilePath, modified: u64, size: u64) -> File {
-    File::Other(OtherInformation {
+/// Create a [File::Other] with the given information.
+/// Used when an error occurs.
+/// 
+/// # Arguments
+/// * `path` - The path of the file.
+/// * `modified` - The modified date of the file.
+/// * `size` - The size of the file.
+/// 
+/// # Returns
+/// The created [File::Other].
+fn worker_create_error(path: FilePath, modified: u64, size: u64) -> BuildFile {
+    BuildFile::Other(BuildOtherInformation {
         path,
         modified,
         content_size: size,
     })
 }
 
-fn worker_publish_new_job(id: usize, job_publish: &Sender<Job>, job: Job) {
+/// Publish a new job.
+/// 
+/// # Arguments
+/// * `id` - The id of the worker.
+/// * `job_publish` - The channel to publish the job to.
+/// * `job` - The job to publish.
+/// 
+/// # Error
+/// Never, issues a warning instead
+fn worker_publish_new_job(id: usize, job_publish: &Sender<BuildJob>, job: BuildJob) {
     match job_publish.send(job) {
         Ok(_) => {},
         Err(e) => {
@@ -108,7 +149,17 @@ fn worker_publish_new_job(id: usize, job_publish: &Sender<Job>, job: Job) {
     }
 }
 
-fn worker_publish_result_or_trigger_parent(id: usize, cached: bool, result: File, job: Job, result_publish: &Sender<JobResult>, job_publish: &Sender<Job>, _arg: &mut WorkerArgument) {
+/// Publish a result and trigger the parent job.
+/// 
+/// # Arguments
+/// * `id` - The id of the worker.
+/// * `cached` - Whether the file is already cached.
+/// * `result` - The result to publish.
+/// * `job` - The job that was processed.
+/// * `result_publish` - The channel to publish the result to.
+/// * `job_publish` - The channel to publish new jobs to.
+/// * `arg` - The argument for the worker thread.
+fn worker_publish_result_or_trigger_parent(id: usize, cached: bool, result: BuildFile, job: BuildJob, result_publish: &Sender<JobResult>, job_publish: &Sender<BuildJob>, _arg: &mut WorkerArgument) {
     let parent_job;
 
     let hash;
@@ -127,7 +178,7 @@ fn worker_publish_result_or_trigger_parent(id: usize, cached: bool, result: File
 
     match parent_job.finished_children.lock() {
         Ok(mut finished) => {
-            finished.push(File::Stub(StubInformation {
+            finished.push(BuildFile::Stub(BuildStubInformation {
                 path: job.target_path,
                 content_hash: hash,
             }));
@@ -149,6 +200,14 @@ fn worker_publish_result_or_trigger_parent(id: usize, cached: bool, result: File
     }
 }
 
-fn worker_fetch_savedata<'a, 'b>(args: &'a WorkerArgument, path: &'b FilePath) -> Option<&'a SaveFileEntry> {
+/// Fetch the saved data for a file.
+/// 
+/// # Arguments
+/// * `args` - The argument for the worker thread.
+/// * `path` - The path of the file to fetch the saved data for.
+/// 
+/// # Returns
+/// The saved data for the file if it exists.
+fn worker_fetch_savedata<'a, 'b>(args: &'a WorkerArgument, path: &'b FilePath) -> Option<&'a HashTreeFileEntry> {
     args.save_file_by_path.get(path)
 }
diff --git a/src/cmd/build/worker/directory.rs b/src/stages/build/cmd/worker/directory.rs
similarity index 75%
rename from src/cmd/build/worker/directory.rs
rename to src/stages/build/cmd/worker/directory.rs
index e0830aa..397845e 100644
--- a/src/cmd/build/worker/directory.rs
+++ b/src/stages/build/cmd/worker/directory.rs
@@ -5,16 +5,28 @@ use std::path::PathBuf;
 use std::sync::Arc;
 use std::sync::mpsc::Sender;
 use log::{error, trace};
-use crate::build::JobResult;
-use crate::build::worker::{worker_create_error, worker_fetch_savedata, worker_publish_result_or_trigger_parent, WorkerArgument};
-use crate::data::{DirectoryInformation, File, GeneralHash, Job, JobState, SaveFileEntryType};
-use crate::utils;
-
-pub fn worker_run_directory(path: PathBuf, modified: u64, size: u64, id: usize, mut job: Job, result_publish: &Sender<JobResult>, job_publish: &Sender<Job>, arg: &mut WorkerArgument) {
+use crate::stages::build::intermediary_build_data::{BuildDirectoryInformation, BuildFile};
+use crate::hash::GeneralHash;
+use crate::stages::build::cmd::job::{BuildJob, BuildJobState, JobResult};
+use crate::stages::build::cmd::worker::{worker_create_error, worker_fetch_savedata, worker_publish_result_or_trigger_parent, WorkerArgument};
+use crate::stages::build::output::HashTreeFileEntryType;
+
+/// Analyze a directory.
+/// 
+/// # Arguments
+/// * `path` - The path to the directory.
+/// * `modified` - The last modified time of the directory.
+/// * `size` - The size of the directory (given by fs::metadata).
+/// * `id` - The id of the worker.
+/// * `job` - The job to process.
+/// * `result_publish` - The channel to publish the result to.
+/// * `job_publish` - The channel to publish new jobs to.
+/// * `arg` - The argument for the worker thread.
+pub fn worker_run_directory(path: PathBuf, modified: u64, size: u64, id: usize, mut job: BuildJob, result_publish: &Sender<JobResult>, job_publish: &Sender<BuildJob>, arg: &mut WorkerArgument) {
     trace!("[{}] analyzing directory {} > {:?}", id, &job.target_path, path);
 
     match job.state {
-        JobState::NotProcessed => {
+        BuildJobState::NotProcessed => {
             let read_dir = fs::read_dir(&path);
             let read_dir = match read_dir {
                 Ok(read_dir) => read_dir,
@@ -41,17 +53,17 @@ pub fn worker_run_directory(path: PathBuf, modified: u64, size: u64, id: usize,
             let mut children = Vec::new();
 
             for entry in read_dir {
-                let child_path = job.target_path.child_real(entry.file_name());
+                let child_path = job.target_path.child(entry.file_name());
                 children.push(child_path);
             }
 
-            job.state = JobState::Analyzed;
+            job.state = BuildJobState::Analyzed;
 
             let parent_job = Arc::new(job);
             let mut jobs = Vec::with_capacity(children.len());
 
             for child in children {
-                let job = Job::new(Some(Arc::clone(&parent_job)), child);
+                let job = BuildJob::new(Some(Arc::clone(&parent_job)), child);
                 jobs.push(job);
             }
 
@@ -66,7 +78,7 @@ pub fn worker_run_directory(path: PathBuf, modified: u64, size: u64, id: usize,
                 }
             }
         },
-        JobState::Analyzed => {
+        BuildJobState::Analyzed => {
             let mut hash = GeneralHash::from_type(arg.hash_type);
             let mut children = Vec::new();
 
@@ -81,14 +93,14 @@ pub fn worker_run_directory(path: PathBuf, modified: u64, size: u64, id: usize,
                     // query cache
                     match worker_fetch_savedata(arg, &job.target_path) {
                         Some(found) => {
-                            if found.file_type == SaveFileEntryType::Directory && found.modified == modified && found.size == finished.len() as u64 {
+                            if found.file_type == HashTreeFileEntryType::Directory && found.modified == modified && found.size == finished.len() as u64 {
                                 if found.children.len() == finished.len() && found.children.iter().zip(finished.iter().map(|e| e.get_content_hash())).all(|(a, b)| a == b) {
                                     trace!("Directory {:?} is already in save file", path);
 
                                     let mut children = Vec::new();
                                     children.append(finished.deref_mut());
 
-                                    let file = File::Directory(DirectoryInformation {
+                                    let file = BuildFile::Directory(BuildDirectoryInformation {
                                         path: job.target_path.clone(),
                                         modified,
                                         content_hash: found.hash.clone(),
@@ -104,7 +116,7 @@ pub fn worker_run_directory(path: PathBuf, modified: u64, size: u64, id: usize,
                     }
 
                     if cached_entry.is_none() {
-                        match utils::hash_directory(finished.iter(), &mut hash) {
+                        match hash.hash_directory(finished.iter()) {
                             Ok(_) => {},
                             Err(err) => {
                                 error = true;
@@ -129,7 +141,7 @@ pub fn worker_run_directory(path: PathBuf, modified: u64, size: u64, id: usize,
                 return;
             }
 
-            let file = File::Directory(DirectoryInformation {
+            let file = BuildFile::Directory(BuildDirectoryInformation {
                 path: job.target_path.clone(),
                 modified,
                 content_hash: hash,
diff --git a/src/cmd/build/worker/file.rs b/src/stages/build/cmd/worker/file.rs
similarity index 64%
rename from src/cmd/build/worker/file.rs
rename to src/stages/build/cmd/worker/file.rs
index a0c841c..0e1e777 100644
--- a/src/cmd/build/worker/file.rs
+++ b/src/stages/build/cmd/worker/file.rs
@@ -1,20 +1,33 @@
+use crate::stages::build::cmd::worker::GeneralHashType;
+use crate::hash::GeneralHash;
 use std::fs;
 use std::path::PathBuf;
 use std::sync::mpsc::Sender;
 use log::{error, trace};
-use crate::build::JobResult;
-use crate::build::worker::{worker_create_error, worker_fetch_savedata, worker_publish_result_or_trigger_parent, WorkerArgument};
-use crate::data::{GeneralHash, Job, GeneralHashType, File, FileInformation, SaveFileEntryType};
-use crate::utils;
+use crate::stages::build::intermediary_build_data::{BuildFile, BuildFileInformation};
+use crate::stages::build::cmd::job::{BuildJob, JobResult};
+use crate::stages::build::cmd::worker::{worker_create_error, worker_fetch_savedata, worker_publish_result_or_trigger_parent, WorkerArgument};
+use crate::stages::build::output::HashTreeFileEntryType;
 
-pub fn worker_run_file(path: PathBuf, modified: u64, size: u64, id: usize, job: Job, result_publish: &Sender<JobResult>, job_publish: &Sender<Job>, arg: &mut WorkerArgument) {
+/// Analyze a file.
+/// 
+/// # Arguments
+/// * `path` - The path to the file.
+/// * `modified` - The last modified time of the file.
+/// * `size` - The size of the file (given by fs::metadata).
+/// * `id` - The id of the worker.
+/// * `job` - The job to process.
+/// * `result_publish` - The channel to publish the result to.
+/// * `job_publish` - The channel to publish new jobs to.
+/// * `arg` - The argument for the worker thread.
+pub fn worker_run_file(path: PathBuf, modified: u64, size: u64, id: usize, job: BuildJob, result_publish: &Sender<JobResult>, job_publish: &Sender<BuildJob>, arg: &mut WorkerArgument) {
     trace!("[{}] analyzing file {} > {:?}", id, &job.target_path, path);
 
     match worker_fetch_savedata(arg, &job.target_path) {
         Some(found) => {
-            if found.file_type == SaveFileEntryType::File && found.modified == modified && found.size == size {
+            if found.file_type == HashTreeFileEntryType::File && found.modified == modified && found.size == size {
                 trace!("File {:?} is already in save file", path);
-                worker_publish_result_or_trigger_parent(id, true, File::File(FileInformation {
+                worker_publish_result_or_trigger_parent(id, true, BuildFile::File(BuildFileInformation {
                     path: job.target_path.clone(),
                     modified,
                     content_hash: found.hash.clone(),
@@ -36,7 +49,7 @@ pub fn worker_run_file(path: PathBuf, modified: u64, size: u64, id: usize, job:
                 // dont hash file
                 content_size = fs::metadata(&path).map(|metadata| metadata.len()).unwrap_or(0);
             } else {
-                match utils::hash_file(&mut reader, &mut hash) {
+                match hash.hash_file(&mut reader) {
                     Ok(size) => {
                         content_size = size;
                     }
@@ -48,7 +61,7 @@ pub fn worker_run_file(path: PathBuf, modified: u64, size: u64, id: usize, job:
                 }
             }
 
-            let file = File::File(FileInformation {
+            let file = BuildFile::File(BuildFileInformation {
                 path: job.target_path.clone(),
                 modified,
                 content_hash: hash,
diff --git a/src/stages/build/cmd/worker/other.rs b/src/stages/build/cmd/worker/other.rs
new file mode 100644
index 0000000..113ad5c
--- /dev/null
+++ b/src/stages/build/cmd/worker/other.rs
@@ -0,0 +1,45 @@
+use std::path::PathBuf;
+use std::sync::mpsc::Sender;
+use log::trace;
+use crate::stages::build::intermediary_build_data::{BuildFile, BuildOtherInformation};
+use crate::stages::build::cmd::job::{BuildJob, JobResult};
+use crate::stages::build::cmd::worker::{worker_fetch_savedata, worker_publish_result_or_trigger_parent, WorkerArgument};
+use crate::stages::build::output::HashTreeFileEntryType;
+
+/// Analyze a file that is not a symlink/folder/file.
+/// 
+/// # Arguments
+/// * `path` - The path to the file.
+/// * `modified` - The last modified time of the file.
+/// * `size` - The size of the file (given by fs::metadata).
+/// * `id` - The id of the worker.
+/// * `job` - The job to process.
+/// * `result_publish` - The channel to publish the result to.
+/// * `job_publish` - The channel to publish new jobs to.
+/// * `arg` - The argument for the worker thread.
+pub fn worker_run_other(path: PathBuf, modified: u64, size: u64, id: usize, job: BuildJob, result_publish: &Sender<JobResult>, job_publish: &Sender<BuildJob>, arg: &mut WorkerArgument) {
+    trace!("[{}] analyzing other {} > {:?}", id, &job.target_path, path);
+
+    match worker_fetch_savedata(arg, &job.target_path) {
+        Some(found) => {
+            if found.file_type == HashTreeFileEntryType::Other && found.modified == modified && found.size == size {
+                trace!("Other {:?} is already in save file", path);
+                worker_publish_result_or_trigger_parent(id, true, BuildFile::Other(BuildOtherInformation {
+                    path: job.target_path.clone(),
+                    content_size: size,
+                    modified,
+                }), job, result_publish, job_publish, arg);
+                return;
+            }
+        }
+        None => {}
+    }
+    
+    let file = BuildFile::Other(BuildOtherInformation {
+        path: job.target_path.clone(),
+        content_size: size,
+        modified,
+    });
+
+    worker_publish_result_or_trigger_parent(id, false, file, job, result_publish, job_publish, arg);
+}
\ No newline at end of file
diff --git a/src/cmd/build/worker/symlink.rs b/src/stages/build/cmd/worker/symlink.rs
similarity index 65%
rename from src/cmd/build/worker/symlink.rs
rename to src/stages/build/cmd/worker/symlink.rs
index 7643179..1c312e6 100644
--- a/src/cmd/build/worker/symlink.rs
+++ b/src/stages/build/cmd/worker/symlink.rs
@@ -1,19 +1,31 @@
-use crate::data::{File, SaveFileEntryType, SymlinkInformation};
+use crate::stages::build::cmd::worker::BuildJob;
 use std::fs;
 use std::path::PathBuf;
 use std::sync::mpsc::Sender;
 use log::{error, trace};
-use crate::build::JobResult;
-use crate::build::worker::{worker_create_error, worker_fetch_savedata, worker_publish_result_or_trigger_parent, WorkerArgument};
-use crate::data::{GeneralHash, Job};
-use crate::utils;
+use crate::stages::build::intermediary_build_data::{BuildFile, BuildSymlinkInformation};
+use crate::hash::GeneralHash;
+use crate::stages::build::cmd::job::JobResult;
+use crate::stages::build::cmd::worker::{worker_create_error, worker_fetch_savedata, worker_publish_result_or_trigger_parent, WorkerArgument};
+use crate::stages::build::output::HashTreeFileEntryType;
 
-pub fn worker_run_symlink(path: PathBuf, modified: u64, size: u64, id: usize, job: Job, result_publish: &Sender<JobResult>, job_publish: &Sender<Job>, arg: &mut WorkerArgument) {
+/// Analyze a symlink.
+/// 
+/// # Arguments
+/// * `path` - The path to the symlink.
+/// * `modified` - The last modified time of the symlink.
+/// * `size` - The size of the symlink (given by fs::metdata).
+/// * `id` - The id of the worker.
+/// * `job` - The job to process.
+/// * `result_publish` - The channel to publish the result to.
+/// * `job_publish` - The channel to publish new jobs to.
+/// * `arg` - The argument for the worker thread.
+pub fn worker_run_symlink(path: PathBuf, modified: u64, size: u64, id: usize, job: BuildJob, result_publish: &Sender<JobResult>, job_publish: &Sender<BuildJob>, arg: &mut WorkerArgument) {
     trace!("[{}] analyzing symlink {} > {:?}", id, &job.target_path, path);
     
     match worker_fetch_savedata(arg, &job.target_path) {
         Some(found) => {
-            if found.file_type == SaveFileEntryType::Symlink && found.modified == modified && found.size == size {
+            if found.file_type == HashTreeFileEntryType::Symlink && found.modified == modified && found.size == size {
                 trace!("Symlink {:?} is already in save file", path);
                 let target_link = fs::read_link(&path);
                 let target_link = match target_link {
@@ -24,7 +36,7 @@ pub fn worker_run_symlink(path: PathBuf, modified: u64, size: u64, id: usize, jo
                         return;
                     }
                 };
-                worker_publish_result_or_trigger_parent(id, true, File::Symlink(SymlinkInformation {
+                worker_publish_result_or_trigger_parent(id, true, BuildFile::Symlink(BuildSymlinkInformation {
                     path: job.target_path.clone(),
                     modified,
                     content_hash: found.hash.clone(),
@@ -49,7 +61,7 @@ pub fn worker_run_symlink(path: PathBuf, modified: u64, size: u64, id: usize, jo
 
     let mut hash = GeneralHash::from_type(arg.hash_type);
 
-    match utils::hash_path(&target_link, &mut hash) {
+    match hash.hash_path(&target_link) {
         Ok(_) => {},
         Err(err) => {
             error!("Error while hashing symlink target {:?}: {}", target_link, err);
@@ -58,7 +70,7 @@ pub fn worker_run_symlink(path: PathBuf, modified: u64, size: u64, id: usize, jo
         }
     }
 
-    let file = File::Symlink(SymlinkInformation {
+    let file = BuildFile::Symlink(BuildSymlinkInformation {
         path: job.target_path.clone(),
         modified,
         content_hash: hash,
diff --git a/src/stages/build/intermediary_build_data.rs b/src/stages/build/intermediary_build_data.rs
new file mode 100644
index 0000000..fbd047e
--- /dev/null
+++ b/src/stages/build/intermediary_build_data.rs
@@ -0,0 +1,182 @@
+use std::path::{PathBuf};
+use serde::{Deserialize, Serialize};
+use crate::hash::GeneralHash;
+use crate::path::FilePath;
+
+/// Information about an analyzed file.
+/// 
+/// # Fields
+/// * `path` - The path of the file.
+/// * `modified` - The last modification time of the file.
+/// * `content_hash` - The hash of the file content.
+/// * `content_size` - The size of the file content.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct BuildFileInformation {
+    pub path: FilePath,
+    pub modified: u64,
+    pub content_hash: GeneralHash,
+    pub content_size: u64,
+}
+
+/// Information about an analyzed directory.
+/// 
+/// # Fields
+/// * `path` - The path of the directory.
+/// * `modified` - The last modification time of the directory.
+/// * `content_hash` - The hash of the directory content.
+/// * `number_of_children` - The number of children in the directory.
+/// * `children` - The children of the directory.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct BuildDirectoryInformation {
+    pub path: FilePath,
+    pub modified: u64,
+    pub content_hash: GeneralHash,
+    pub number_of_children: u64,
+    pub children: Vec<BuildFile>,
+}
+
+/// Information about an analyzed symlink.
+/// 
+/// # Fields
+/// * `path` - The path of the symlink.
+/// * `modified` - The last modification time of the symlink.
+/// * `content_hash` - The hash of the symlink content.
+/// * `target` - The target of the symlink.
+/// * `content_size` - The size of the symlink content.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct BuildSymlinkInformation {
+    pub path: FilePath,
+    pub modified: u64,
+    pub content_hash: GeneralHash, // equal to the target file's hash or if not following symlinks, the symlink's path hashed
+    pub target: PathBuf,
+    pub content_size: u64,
+}
+
+/// Information about an analyzed file that is not a regular file, directory, or symlink.
+/// This could be sockets, block devices, character devices, etc. or file for which permissions are missing.
+/// 
+/// # Fields
+/// * `path` - The path of the file.
+/// * `modified` - The last modification time of the file.
+/// * `content_size` - The size of the file content.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct BuildOtherInformation {
+    pub path: FilePath,
+    pub modified: u64,
+    pub content_size: u64,
+}
+
+/// Information about a file that is not kept in memory but saved to disk.
+/// 
+/// # Fields
+/// * `path` - The path of the file.
+/// * `content_hash` - The hash of the file content.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct BuildStubInformation {
+    pub path: FilePath,
+    pub content_hash: GeneralHash,
+}
+
+/// A file that has been analyzed.
+/// 
+/// # Variants
+/// * `File` - A regular file.
+/// * `Directory` - A directory.
+/// * `Symlink` - A symlink.
+/// * `Other` - A file that is not a regular file, directory, or symlink, or a file for which permissions are missing.
+/// * `Stub` - A file that is not kept in memory but already saved to disk in the hashtree file.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub enum BuildFile {
+    File(BuildFileInformation),
+    Directory(BuildDirectoryInformation),
+    Symlink(BuildSymlinkInformation),
+    Other(BuildOtherInformation), // for unsupported file types like block devices, character devices, etc., or files without permission
+    Stub(BuildStubInformation), // for files that are already analyzed
+}
+
+// ---- IMPLEMENTATION ----
+
+impl BuildFile {
+    /// Get the hash of a file
+    /// 
+    /// # Returns
+    /// The hash of the file. If the file is of type `Other` the hash is [GeneralHash::NULL].
+    pub fn get_content_hash(&self) -> &GeneralHash {
+        match self {
+            BuildFile::File(info) => &info.content_hash,
+            BuildFile::Directory(info) => &info.content_hash,
+            BuildFile::Symlink(info) => &info.content_hash,
+            BuildFile::Other(_) => &GeneralHash::NULL,
+            BuildFile::Stub(info) => &info.content_hash,
+        }
+    }
+    
+    /// Gets the path of this file
+    /// 
+    /// # Returns
+    /// The path of the file.
+    pub fn get_path(&self) -> &FilePath {
+        match self {
+            BuildFile::File(info) => &info.path,
+            BuildFile::Directory(info) => &info.path,
+            BuildFile::Symlink(info) => &info.path,
+            BuildFile::Other(info) => &info.path,
+            BuildFile::Stub(info) => &info.path,
+        }
+    }
+
+    /// Returns if this is a directory
+    /// 
+    /// # Returns
+    /// True if this is a directory, false otherwise.
+    pub fn is_directory(&self) -> bool {
+        match self {
+            BuildFile::Directory(_) => true,
+            _ => false,
+        }
+    }
+
+    /// Returns if this is a symlink
+    /// 
+    /// # Returns
+    /// True if this is a symlink, false otherwise.
+    pub fn is_symlink(&self) -> bool {
+        match self {
+            BuildFile::Symlink(_) => true,
+            _ => false,
+        }
+    }
+
+    /// Returns if this is a file
+    /// 
+    /// # Returns
+    /// True if this is a file, false otherwise.
+    pub fn is_file(&self) -> bool {
+        match self {
+            BuildFile::File(_) => true,
+            _ => false,
+        }
+    }
+
+    /// Returns if this is an "other" file
+    /// 
+    /// # Returns
+    /// True if this is an "other" file, false otherwise.
+    pub fn is_other(&self) -> bool {
+        match self {
+            BuildFile::Other(_) => true,
+            _ => false,
+        }
+    }
+    
+    /// Returns if this is a stub file
+    /// 
+    /// # Returns
+    /// True if this is a stub file, false otherwise.
+    pub fn is_stub(&self) -> bool {
+        match self {
+            BuildFile::Stub(_) => true,
+            _ => false,
+        }
+    }
+}
diff --git a/src/stages/build/output/converter.rs b/src/stages/build/output/converter.rs
new file mode 100644
index 0000000..f6895e9
--- /dev/null
+++ b/src/stages/build/output/converter.rs
@@ -0,0 +1,269 @@
+use crate::stages::build::intermediary_build_data::{BuildDirectoryInformation, BuildFile, BuildFileInformation, BuildOtherInformation, BuildStubInformation, BuildSymlinkInformation};
+use crate::hash::GeneralHash;
+use crate::stages::build::output::{HashTreeFileEntryType, HashTreeFileEntry, HashTreeFileEntryRef};
+
+impl From<BuildFileInformation> for HashTreeFileEntry {
+    /// Convert a [BuildFileInformation] into a [HashTreeFileEntry].
+    /// 
+    /// # Arguments
+    /// * `value` - The [BuildFileInformation] to convert.
+    /// 
+    /// # Returns
+    /// The converted [HashTreeFileEntry].
+    fn from(value: BuildFileInformation) -> Self {
+        Self {
+            file_type: HashTreeFileEntryType::File,
+            modified: value.modified,
+            size: value.content_size,
+            hash: value.content_hash,
+            path: value.path,
+            children: Vec::with_capacity(0),
+        }
+    }
+}
+
+impl From<BuildSymlinkInformation> for HashTreeFileEntry {
+    /// Convert a [BuildSymlinkInformation] into a [HashTreeFileEntry].
+    /// 
+    /// # Arguments
+    /// * `value` - The [BuildSymlinkInformation] to convert.
+    /// 
+    /// # Returns
+    /// The converted [HashTreeFileEntry].
+    fn from(value: BuildSymlinkInformation) -> Self {
+        Self {
+            file_type: HashTreeFileEntryType::Symlink,
+            modified: value.modified,
+            size: value.content_size,
+            hash: value.content_hash,
+            path: value.path,
+            children: Vec::with_capacity(0),
+        }
+    }
+}
+
+impl From<BuildDirectoryInformation> for HashTreeFileEntry {
+    /// Convert a [BuildDirectoryInformation] into a [HashTreeFileEntry].
+    /// 
+    /// # Arguments
+    /// * `value` - The [BuildDirectoryInformation] to convert.
+    /// 
+    /// # Returns
+    /// The converted [HashTreeFileEntry].
+    fn from(value: BuildDirectoryInformation) -> Self {
+        let mut result = Self {
+            file_type: HashTreeFileEntryType::Directory,
+            modified: value.modified,
+            size: value.number_of_children,
+            hash: value.content_hash,
+            path: value.path,
+            children: Vec::with_capacity(value.children.len()),
+        };
+        for child in value.children {
+            result.children.push(child.get_content_hash().clone());
+        }
+        result
+    }
+}
+
+impl From<BuildOtherInformation> for HashTreeFileEntry {
+    /// Convert a [BuildOtherInformation] into a [HashTreeFileEntry].
+    /// 
+    /// # Arguments
+    /// * `value` - The [BuildOtherInformation] to convert.
+    /// 
+    /// # Returns
+    /// The converted [HashTreeFileEntry].
+    fn from(value: BuildOtherInformation) -> Self {
+        Self {
+            file_type: HashTreeFileEntryType::Other,
+            modified: value.modified,
+            size: value.content_size,
+            hash: GeneralHash::NULL,
+            path: value.path,
+            children: Vec::with_capacity(0),
+        }
+    }
+}
+
+impl From<BuildStubInformation> for HashTreeFileEntry {
+    /// Convert a [BuildStubInformation] into a [HashTreeFileEntry].
+    /// 
+    /// # Arguments
+    /// * `value` - The [BuildStubInformation] to convert.
+    /// 
+    /// # Returns
+    /// The converted [HashTreeFileEntry].
+    fn from(value: BuildStubInformation) -> Self {
+        Self {
+            file_type: HashTreeFileEntryType::Other,
+            modified: 0,
+            size: 0,
+            hash: value.content_hash,
+            path: value.path,
+            children: Vec::with_capacity(0),
+        }
+    }
+}
+
+impl<'a> From<&'a BuildFileInformation> for HashTreeFileEntryRef<'a> {
+    /// Convert a [BuildFileInformation] into a [HashTreeFileEntryRef].
+    /// 
+    /// # Arguments
+    /// * `value` - The reference to the [BuildFileInformation] to convert.
+    /// 
+    /// # Returns
+    /// The converted [HashTreeFileEntryRef].
+    fn from(value: &'a BuildFileInformation) -> Self {
+        Self {
+            file_type: &HashTreeFileEntryType::File,
+            modified: &value.modified,
+            hash: &value.content_hash,
+            path: &value.path,
+            size: &value.content_size,
+            children: Vec::with_capacity(0),
+        }
+    }
+}
+
+impl<'a> From<&'a BuildSymlinkInformation> for HashTreeFileEntryRef<'a> {
+    /// Convert a [BuildSymlinkInformation] into a [HashTreeFileEntryRef].
+    /// 
+    /// # Arguments
+    /// * `value` - The reference to the [BuildSymlinkInformation] to convert.
+    /// 
+    /// # Returns
+    /// The converted [HashTreeFileEntryRef].
+    fn from(value: &'a BuildSymlinkInformation) -> Self {
+        Self {
+            file_type: &HashTreeFileEntryType::Symlink,
+            modified: &value.modified,
+            hash: &value.content_hash,
+            path: &value.path,
+            size: &value.content_size,
+            children: Vec::with_capacity(0),
+        }
+    }
+}
+
+impl<'a> From<&'a BuildDirectoryInformation> for HashTreeFileEntryRef<'a> {
+    /// Convert a [BuildDirectoryInformation] into a [HashTreeFileEntryRef].
+    /// 
+    /// # Arguments
+    /// * `value` - The reference to the [BuildDirectoryInformation] to convert.
+    /// 
+    /// # Returns
+    /// The converted [HashTreeFileEntryRef].
+    fn from(value: &'a BuildDirectoryInformation) -> Self {
+        let mut result = Self {
+            file_type: &HashTreeFileEntryType::Directory,
+            modified: &value.modified,
+            hash: &value.content_hash,
+            path: &value.path,
+            size: &value.number_of_children,
+            children: Vec::with_capacity(value.children.len()),
+        };
+        for child in &value.children {
+            result.children.push(child.get_content_hash());
+        }
+        result
+    }
+}
+
+impl<'a> From<&'a BuildOtherInformation> for HashTreeFileEntryRef<'a> {
+    /// Convert a [BuildOtherInformation] into a [HashTreeFileEntryRef].
+    /// 
+    /// # Arguments
+    /// * `value` - The reference to the [BuildOtherInformation] to convert.
+    /// 
+    /// # Returns
+    /// The converted [HashTreeFileEntryRef].
+    fn from(value: &'a BuildOtherInformation) -> Self {
+        Self {
+            file_type: &HashTreeFileEntryType::Other,
+            modified: &0,
+            hash: &GeneralHash::NULL,
+            path: &value.path,
+            size: &value.content_size,
+            children: Vec::with_capacity(0),
+        }
+    }
+}
+
+impl<'a> From<&'a BuildStubInformation> for HashTreeFileEntryRef<'a> {
+    /// Convert a [BuildStubInformation] into a [HashTreeFileEntryRef].
+    /// 
+    /// # Arguments
+    /// * `value` - The reference to the [BuildStubInformation] to convert.
+    /// 
+    /// # Returns
+    /// The converted [HashTreeFileEntryRef].
+    fn from(value: &'a BuildStubInformation) -> Self {
+        Self {
+            file_type: &HashTreeFileEntryType::Other,
+            modified: &0,
+            hash: &value.content_hash,
+            path: &value.path,
+            size: &0,
+            children: Vec::with_capacity(0),
+        }
+    }
+}
+
+impl From<BuildFile> for HashTreeFileEntry {
+    /// Convert a [BuildFile] into a [HashTreeFileEntry].
+    /// 
+    /// # Arguments
+    /// * `value` - The [BuildFile] to convert.
+    /// 
+    /// # Returns
+    /// The converted [HashTreeFileEntry].
+    fn from(value: BuildFile) -> Self {
+        match value {
+            BuildFile::File(info) => info.into(),
+            BuildFile::Directory(info) => info.into(),
+            BuildFile::Symlink(info) => info.into(),
+            BuildFile::Other(info) => info.into(),
+            BuildFile::Stub(info) => info.into(),
+        }
+    }
+}
+
+impl<'a> From<&'a BuildFile> for HashTreeFileEntryRef<'a> {
+    /// Convert a [BuildFile] into a [HashTreeFileEntryRef].
+    /// 
+    /// # Arguments
+    /// * `value` - The reference to the [BuildFile] to convert.
+    /// 
+    /// # Returns
+    /// The converted [HashTreeFileEntryRef].
+    fn from(value: &'a BuildFile) -> Self {
+        match value {
+            BuildFile::File(info) => info.into(),
+            BuildFile::Directory(info) => info.into(),
+            BuildFile::Symlink(info) => info.into(),
+            BuildFile::Other(info) => info.into(),
+            BuildFile::Stub(info) => info.into(),
+        }
+    }
+}
+
+impl<'a> From<&'a HashTreeFileEntry> for HashTreeFileEntryRef<'a> {
+    /// Convert a [HashTreeFileEntry] into a [HashTreeFileEntryRef].
+    /// 
+    /// # Arguments
+    /// * `value` - The reference to the [HashTreeFileEntry] to convert.
+    /// 
+    /// # Returns
+    /// The converted [HashTreeFileEntryRef].
+    fn from(value: &'a HashTreeFileEntry) -> Self {
+        Self {
+            file_type: &value.file_type,
+            modified: &value.modified,
+            hash: &value.hash,
+            path: &value.path,
+            size: &value.size,
+            children: Vec::with_capacity(0),
+        }
+    }
+}
diff --git a/src/stages/build/output/hashtreefile.rs b/src/stages/build/output/hashtreefile.rs
new file mode 100644
index 0000000..7935a0a
--- /dev/null
+++ b/src/stages/build/output/hashtreefile.rs
@@ -0,0 +1,337 @@
+use std::cell::RefCell;
+use std::collections::HashMap;
+use std::io::{BufRead, Write};
+use std::ops::DerefMut;
+use std::sync::Arc;
+
+use anyhow::Result;
+use log::{info, trace, warn};
+use serde::{Deserialize, Serialize};
+
+pub use HashTreeFileEntryTypeV1 as HashTreeFileEntryType;
+pub use HashTreeFileEntryV1 as HashTreeFileEntry;
+pub type HashTreeFileEntryRef<'a> = HashTreeFileEntryV1Ref<'a>;
+
+use crate::hash::{GeneralHash, GeneralHashType};
+use crate::path::FilePath;
+use crate::utils;
+
+/// HashTreeFile file version. In further versions, the file format may change.
+/// Currently only one file version exist.
+///
+/// # Fields
+/// * `V1` - Version 1 of the file format.
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub enum HashTreeFileVersion {
+    V1,
+}
+
+/// HashTreeFile file header. First line of a hash tree file.
+///
+/// # Fields
+/// * `version` - The version of the file.
+/// * `hash_type` - The hash type used to hash the files.
+/// * `creation_date` - The creation date of the file in unix time
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct HashTreeFileHeader {
+    pub version: HashTreeFileVersion,
+    pub hash_type: GeneralHashType,
+    pub creation_date: u64,
+}
+
+/// HashTreeFile entry type. Describes the type of file.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Hash, Eq)]
+pub enum HashTreeFileEntryTypeV1 {
+    File,
+    Directory,
+    Symlink,
+    Other,
+}
+
+/// HashTreeFile entry. Describes an analyzed file.
+///
+/// # Fields
+/// * `file_type` - The type of the file.
+/// * `modified` - The last modified date of the file in unix time.
+/// * `size` - The size of the file in bytes for files, number of children for folders.
+/// * `hash` - The hash of the file content.
+/// * `path` - The path of the file.
+/// * `children` - The children of the file. Only for directories.
+///
+/// # See also
+/// * [HashTreeFileEntryV1Ref] which is a reference version of this struct.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct HashTreeFileEntryV1 {
+    pub file_type: HashTreeFileEntryTypeV1,
+    pub modified: u64,
+    pub size: u64,
+    pub hash: GeneralHash,
+    pub path: FilePath,
+    pub children: Vec<GeneralHash>,
+}
+
+/// HashTreeFile entry reference. Describes an analyzed file.
+/// This is a reference version of the [HashTreeFileEntryV1] struct.
+/// 
+/// # Fields
+/// * `file_type` - The type of the file.
+/// * `modified` - The last modified date of the file in unix time.
+/// * `size` - The size of the file in bytes for files, number of children for folders.
+/// * `hash` - The hash of the file content.
+/// * `path` - The path of the file.
+/// * `children` - The children of the file. Only for directories.
+/// 
+/// # See also
+/// * [HashTreeFileEntryV1] which is the owned version of this struct.
+#[derive(Debug, Serialize)]
+pub struct HashTreeFileEntryV1Ref<'a> {
+    pub file_type: &'a HashTreeFileEntryTypeV1,
+    pub modified: &'a u64,
+    pub size: &'a u64,
+    pub hash: &'a GeneralHash,
+    pub path: &'a FilePath,
+    pub children: Vec<&'a GeneralHash>,
+}
+
+/// Interface to access and manage a hash tree file.
+/// 
+/// # Fields
+/// * `header` - The header of the file.
+/// * `file_by_hash` - A map of files by their hash.
+/// * `file_by_path` - A map of files by their path.
+/// * `all_entries` - A list of all entries.
+pub struct HashTreeFile<'a, W, R> where W: Write, R: BufRead {
+    pub header: HashTreeFileHeader,
+    pub file_by_hash: HashMap<GeneralHash, Vec<Arc<HashTreeFileEntry>>>,
+    pub file_by_path: HashMap<FilePath, Arc<HashTreeFileEntry>>,
+    pub all_entries: Vec<Arc<HashTreeFileEntry>>,
+    
+    enable_file_by_hash: bool,
+    enable_file_by_path: bool,
+    enable_all_entry_list: bool,
+    
+    writer: RefCell<&'a mut W>,
+    written_bytes: RefCell<usize>,
+    reader: RefCell<&'a mut R>,
+}
+
+impl<'a, W: Write, R: BufRead> HashTreeFile<'a, W, R> {
+    /// Create a new hash tree file.
+    /// 
+    /// If not writing a new header hash_type can be set to GeneralHashType::NULL. 
+    /// 
+    /// # Arguments
+    /// * `writer` - The writer to write the file.
+    /// * `reader` - The reader to read the file.
+    /// * `hash_type` - The hash type used to hash the files.
+    /// * `enable_file_by_hash` - Whether to enable the file by hash - hash map.
+    /// * `enable_file_by_path` - Whether to enable the file by path - hash map.
+    /// * `enable_all_entry_list` - Whether to enable the all entries list.
+    /// 
+    /// # Returns
+    /// The created hash tree file interface.
+    pub fn new(writer: &'a mut W, reader: &'a mut R, hash_type: GeneralHashType, enable_file_by_hash: bool, enable_file_by_path: bool, enable_all_entry_list: bool) -> Self {
+        let time = utils::get_time();
+        HashTreeFile {
+            header: HashTreeFileHeader {
+                version: HashTreeFileVersion::V1,
+                hash_type,
+                creation_date: time,
+            },
+            file_by_hash: HashMap::new(),
+            file_by_path: HashMap::new(),
+            all_entries: Vec::new(),
+            enable_file_by_hash,
+            enable_file_by_path,
+            enable_all_entry_list,
+            writer: RefCell::new(writer),
+            reader: RefCell::new(reader),
+            written_bytes: RefCell::new(0),
+        }
+    }
+    
+    /// Save the header to the file
+    /// 
+    /// # Error
+    /// If writing to the file errors
+    pub fn save_header(&self) -> Result<()> {
+        let header_str = serde_json::to_string(&self.header)?;
+        *self.written_bytes.borrow_mut() += self.writer.borrow_mut().deref_mut().write(header_str.as_bytes())?;
+        *self.written_bytes.borrow_mut() += self.writer.borrow_mut().deref_mut().write(b"\n")?;
+        
+        Ok(())
+    }
+    
+    /// Load a file header from the file
+    /// 
+    /// # Error
+    /// If reading from the file errors
+    pub fn load_header(&mut self) -> Result<()> {
+        let mut header_str = String::new();
+        self.reader.borrow_mut().deref_mut().read_line(&mut header_str)?;
+        
+        let header: HashTreeFileHeader = serde_json::from_str(header_str.as_str())?;
+        self.header = header;
+        
+        Ok(())
+    }
+
+    /// Load a file entry from the file
+    /// 
+    /// # Error
+    /// If reading from the file errors
+    pub fn load_entry_no_filter(&mut self) -> Result<Option<Arc<HashTreeFileEntry>>> {
+        self.load_entry(|_| true)
+    }
+    
+    /// Load a file entry from the file
+    /// 
+    /// # Arguments
+    /// * `filter` - A filter function to filter the entries. If the function returns false the entry is ignored.
+    /// 
+    /// # Returns
+    /// The loaded entry or None if the end of the file is reached.
+    /// 
+    /// # Error
+    /// If reading from the file errors
+    pub fn load_entry<F: Fn(&HashTreeFileEntry) -> bool>(&mut self, filter: F) -> Result<Option<Arc<HashTreeFileEntry>>> {
+        loop {
+            let mut entry_str = String::new();
+            let count = self.reader.borrow_mut().deref_mut().read_line(&mut entry_str)?;
+
+            if count == 0 {
+                return Ok(None);
+            }
+            
+            if count == 1 {
+                continue;
+            }
+
+            let entry: HashTreeFileEntry = serde_json::from_str(entry_str.as_str())?;
+
+            if entry.hash.hash_type() != self.header.hash_type && !(entry.file_type == HashTreeFileEntryType::Other && entry.hash.hash_type() == GeneralHashType::NULL) {
+                warn!("Hash type mismatch ignoring entry: {:?}", entry.path);
+                continue;
+            }
+            
+            if !filter(&entry) {
+                trace!("Entry filtered: {:?}", entry.path);
+                continue;
+            }
+
+            let shared_entry = Arc::new(entry);
+
+            if self.enable_file_by_hash {
+                self.file_by_hash.entry(shared_entry.hash.clone()).or_insert_with(Vec::new).push(Arc::clone(&shared_entry));
+            }
+
+            if self.enable_file_by_path {
+                match self.file_by_path.insert(shared_entry.path.clone(), Arc::clone(&shared_entry)) {
+                    None => {}
+                    Some(old) => {
+                        // this happens if analysis was canceled and continued
+                        // and an already analysed file changed
+                        info!("Duplicate entry for path: {:?}", &old.path);
+                        if self.enable_all_entry_list {
+                            self.all_entries.retain(|x| x != &old);
+                        }
+                    }
+                }
+            }
+
+            if self.enable_all_entry_list {
+                self.all_entries.push(Arc::clone(&shared_entry));
+            }
+
+            return Ok(Some(shared_entry))
+        }
+    }
+    
+    /// Load all entries from the file. Till the end of the file is reached.
+    /// 
+    /// # Arguments
+    /// * `filter` - A filter function to filter the entries. If the function returns false the entry is ignored.
+    /// 
+    /// # Error
+    /// If reading from the file errors
+    pub fn load_all_entries<F: Fn(&HashTreeFileEntry) -> bool>(&mut self, filter: F) -> Result<()> {
+        while let Some(_) = self.load_entry(&filter)? {}
+        
+        Ok(())
+    }
+
+    /// Load all entries from the file. Till the end of the file is reached.
+    /// 
+    /// # Error
+    /// If reading from the file errors
+    pub fn load_all_entries_no_filter(&mut self) -> Result<()> {
+        self.load_all_entries(|_| true)
+    }
+
+    /// Write an entry to the file
+    /// 
+    /// # Arguments
+    /// * `result` - The entry to write.
+    /// 
+    /// # Error
+    /// If writing to the file errors
+    pub fn write_entry(&self, result: &HashTreeFileEntry) -> Result<()> {
+        let string = serde_json::to_string(result)?;
+        *self.written_bytes.borrow_mut() += self.writer.borrow_mut().deref_mut().write(string.as_bytes())?;
+        *self.written_bytes.borrow_mut() += self.writer.borrow_mut().deref_mut().write("\n".as_bytes())?;
+        self.writer.borrow_mut().deref_mut().flush()?;
+        Ok(())
+    }
+
+    /// Write an entry reference to the file
+    /// 
+    /// # Arguments
+    /// * `result` - The entry reference to write.
+    /// 
+    /// # Error
+    /// If writing to the file errors
+    pub fn write_entry_ref(&self, result: &HashTreeFileEntryRef) -> Result<()> {
+        let string = serde_json::to_string(result)?;
+        *self.written_bytes.borrow_mut() += self.writer.borrow_mut().deref_mut().write(string.as_bytes())?;
+        *self.written_bytes.borrow_mut() += self.writer.borrow_mut().deref_mut().write("\n".as_bytes())?;
+        self.writer.borrow_mut().deref_mut().flush()?;
+        Ok(())
+    }
+    
+    /// Empty the file by hash - hash map.
+    /// Frees/Shrinks the memory used.
+    pub fn empty_file_by_hash(&mut self) {
+        self.file_by_hash.clear();
+        self.file_by_hash.shrink_to_fit();
+    }
+    
+    /// Empty the file by path - hash map.
+    /// Frees/Shrinks the memory used.
+    pub fn empty_file_by_path(&mut self) {
+        self.file_by_path.clear();
+        self.file_by_path.shrink_to_fit();
+    }
+
+    /// Empty the all entries list.
+    /// Frees/Shrinks the memory used.
+    pub fn empty_entry_list(&mut self) {
+        self.all_entries.clear();
+        self.all_entries.shrink_to_fit();
+    }
+
+    /// Get the written bytes count.
+    /// 
+    /// # Returns
+    /// The written bytes count.
+    pub fn get_written_bytes(&self) -> usize {
+        *self.written_bytes.borrow()
+    }
+    
+    /// Flush the writer.
+    /// 
+    /// # Error
+    /// If flushing the writer errors
+    pub fn flush(&self) -> std::io::Result<()> {
+        self.writer.borrow_mut().deref_mut().flush()
+    }
+}
diff --git a/src/stages/clean.rs b/src/stages/clean.rs
new file mode 100644
index 0000000..52958ec
--- /dev/null
+++ b/src/stages/clean.rs
@@ -0,0 +1 @@
+pub mod cmd;
diff --git a/src/cmd/clean.rs b/src/stages/clean/cmd.rs
similarity index 73%
rename from src/cmd/clean.rs
rename to src/stages/clean/cmd.rs
index d509d5e..c48641e 100644
--- a/src/cmd/clean.rs
+++ b/src/stages/clean/cmd.rs
@@ -2,8 +2,16 @@ use std::fs;
 use std::path::PathBuf;
 use anyhow::{anyhow, Result};
 use log::{info, trace, warn};
-use crate::data::{SaveFile, SaveFileEntryType};
+use crate::hash::GeneralHashType;
+use crate::stages::build::output::{HashTreeFile, HashTreeFileEntryType};
 
+/// Settings for the clean stage.
+/// 
+/// # Fields
+/// * `input` - The input hashtree file to clean.
+/// * `output` - The output hashtree file to write the cleaned hashtree to.
+/// * `root` - The root path of the original working directory. This is used to resolve relative paths.
+/// * `follow_symlinks` - Whether to follow symlinks when checking if files exist.
 pub struct CleanSettings {
     pub input: PathBuf,
     pub output: PathBuf,
@@ -11,6 +19,10 @@ pub struct CleanSettings {
     pub follow_symlinks: bool,
 }
 
+/// Run the clean command.
+/// 
+/// # Arguments
+/// * `clean_settings` - The settings for the clean command.
 pub fn run(
     clean_settings: CleanSettings,
 ) -> Result<()> {
@@ -39,7 +51,7 @@ pub fn run(
     let mut input_buf_reader = std::io::BufReader::new(&input_file);
     let mut output_buf_writer = std::io::BufWriter::new(&output_file);
 
-    let mut save_file = SaveFile::new(&mut output_buf_writer, &mut input_buf_reader, false, true, true);
+    let mut save_file = HashTreeFile::new(&mut output_buf_writer, &mut input_buf_reader, GeneralHashType::NULL, false, true, true);
     save_file.load_header()?;
 
     // remove duplicates, remove deleted files
@@ -64,13 +76,13 @@ pub fn run(
                 
                 if let Some(metadata) = metadata {
                     return if metadata.is_symlink() {
-                        entry.file_type == SaveFileEntryType::Symlink
+                        entry.file_type == HashTreeFileEntryType::Symlink
                     } else if metadata.is_dir() {
-                        entry.file_type == SaveFileEntryType::Directory
+                        entry.file_type == HashTreeFileEntryType::Directory
                     } else if metadata.is_file() {
-                        entry.file_type == SaveFileEntryType::File
+                        entry.file_type == HashTreeFileEntryType::File
                     } else {
-                        entry.file_type == SaveFileEntryType::Other
+                        entry.file_type == HashTreeFileEntryType::Other
                     }
                 }
                 
diff --git a/src/threadpool.rs b/src/threadpool.rs
deleted file mode 100644
index 0fa730b..0000000
--- a/src/threadpool.rs
+++ /dev/null
@@ -1,187 +0,0 @@
-use std::sync::{Arc, mpsc, Mutex};
-use std::sync::mpsc::{Receiver, RecvTimeoutError, Sender};
-use std::thread;
-use std::time::Duration;
-use log::{debug, error, trace, warn};
-use crate::data::{JobTrait, ResultTrait};
-
-type WorkerEntry<Job, Result, Argument> = fn(usize, Job, &Sender<Result>, &Sender<Job>, &mut Argument);
-
-struct Worker
-{
-    id: usize,
-    thread: Option<thread::JoinHandle<()>>,
-}
-
-impl Worker {
-    fn new<Job: JobTrait + std::marker::Send + 'static, Result: ResultTrait + std::marker::Send + 'static, Argument: std::marker::Send + 'static>(id: usize, job_receive: Arc<Mutex<Receiver<Job>>>, result_publish: Sender<Result>, job_publish: Sender<Job>, func: WorkerEntry<Job, Result, Argument>, arg: Argument) -> Worker {
-        let thread = thread::spawn(move || {
-            Worker::worker_entry(id, job_receive, result_publish, job_publish, func, arg);
-        });
-
-        Worker { id, thread: Some(thread) }
-    }
-
-    fn worker_entry<Job: JobTrait + std::marker::Send + 'static, Result: ResultTrait + std::marker::Send + 'static, Argument: std::marker::Send + 'static>(id: usize, job_receive: Arc<Mutex<Receiver<Job>>>, result_publish: Sender<Result>, job_publish: Sender<Job>, func: WorkerEntry<Job, Result, Argument>, mut arg: Argument) {
-        loop {
-            let job = job_receive.lock();
-
-            let job = match job {
-                Err(e) => {
-                    error!("Worker {} shutting down {}", id, e);
-                    break;
-                }
-                Ok(job) => {
-                    job.recv()
-                }
-            };
-
-            match job {
-                Err(_) => {
-                    trace!("Worker {} shutting down", id);
-                    break;
-                }
-                Ok(job) => {
-                    trace!("Worker {} received job {}", id, job.job_id());
-                    func(id, job, &result_publish, &job_publish, &mut arg);
-                }
-            }
-        }
-    }
-}
-
-pub struct ThreadPool<Job, Result>
-where
-    Job: Send,
-    Result: Send,
-{
-    workers: Vec<Worker>,
-    thread: Option<thread::JoinHandle<()>>,
-    job_publish: Arc<Mutex<Option<Sender<Job>>>>,
-    result_receive: Receiver<Result>,
-}
-
-impl<Job: std::marker::Send + JobTrait + 'static, Result: std::marker::Send + ResultTrait + 'static> ThreadPool<Job, Result> {
-    pub fn new<Argument: std::marker::Send + 'static>(mut args: Vec<Argument>, func: WorkerEntry<Job, Result, Argument>) -> ThreadPool<Job, Result> {
-        assert!(args.len() > 0);
-
-        let mut workers = Vec::with_capacity(args.len());
-
-        let (job_publish, job_receive) = mpsc::channel();
-
-        let job_receive = Arc::new(Mutex::new(job_receive));
-        let (result_publish, result_receive) = mpsc::channel();
-        let (thread_publish_job, thread_receive_job) = mpsc::channel();
-
-        let mut id = 0;
-        while let Some(arg) = args.pop() {
-            workers.push(Worker::new(id, Arc::clone(&job_receive), result_publish.clone(), thread_publish_job.clone(), func, arg));
-            id += 1;
-        }
-
-        let job_publish = Arc::new(Mutex::new(Some(job_publish)));
-        let job_publish_clone = Arc::clone(&job_publish);
-
-        let thread = thread::spawn(move || {
-            ThreadPool::<Job, Result>::pool_entry(job_publish_clone, thread_receive_job);
-        });
-
-        ThreadPool {
-            workers,
-            job_publish,
-            result_receive,
-            thread: Some(thread),
-        }
-    }
-    
-    pub fn publish(&self, job: Job) {
-        let job_publish = self.job_publish.lock();
-        match job_publish {
-            Err(e) => {
-                error!("ThreadPool is shutting down. Cannot publish job. {}", e);
-            }
-            Ok(job_publish) => {
-                match job_publish.as_ref() {
-                    None => {
-                        error!("ThreadPool is shutting down. Cannot publish job.");
-                    }
-                    Some(job_publish) => {
-                        match job_publish.send(job) {
-                            Err(e) => {
-                                error!("Failed to publish job on thread pool. {}", e);
-                            }
-                            Ok(_) => {}
-                        }
-                    }
-                }
-            }
-        }
-
-    }
-
-    fn pool_entry(job_publish: Arc<Mutex<Option<Sender<Job>>>>, job_receive: Receiver<Job>) {
-        loop {
-            let job = job_receive.recv();
-
-            match job {
-                Err(_) => {
-                    trace!("Pool worker shutting down");
-                    break;
-                }
-                Ok(job) => {
-                    match job_publish.lock() {
-                        Err(e) => {
-                            error!("Pool worker shutting down: {}", e);
-                            break;
-                        }
-                        Ok(job_publish) => {
-                            if let Some(job_publish) = job_publish.as_ref() {
-                                job_publish.send(job).expect("Pool worker failed to send job. This should never fail.");
-                            }
-                        }
-                    }
-                }
-            }
-        }
-    }
-    
-    pub fn receive(&self) -> std::result::Result<Result, mpsc::RecvError> {
-        self.result_receive.recv()
-    }
-
-    pub fn receive_timeout(&self, timeout: Duration) -> std::result::Result<Result, RecvTimeoutError> {
-        self.result_receive.recv_timeout(timeout)
-    }
-}
-
-impl<Job: std::marker::Send, Result: std::marker::Send> Drop for ThreadPool<Job, Result> {
-    fn drop(&mut self) {
-        drop(self.job_publish.lock().expect("This should not break").take());
-
-        for worker in &mut self.workers {
-            debug!("Shutting down worker {}", worker.id);
-
-            if let Some(thread) = worker.thread.take() {
-                match thread.join() {
-                    Ok(_) => {
-                        trace!("Worker {} shut down", worker.id);
-                    }
-                    Err(_) => {
-                        warn!("Worker {} panicked", worker.id);
-                    }
-                }
-            }
-        }
-
-        if let Some(thread) = self.thread.take() {
-            match thread.join() {
-                Ok(_) => {
-                    trace!("ThreadPool shut down");
-                }
-                Err(_) => {
-                    warn!("ThreadPool worker panicked");
-                }
-            }
-        }
-    }
-}
\ No newline at end of file
diff --git a/src/utils.rs b/src/utils.rs
index d696083..15d0656 100644
--- a/src/utils.rs
+++ b/src/utils.rs
@@ -1,14 +1,43 @@
-use std::path::{Path, PathBuf};
+use std::io::Write;
+use std::path::{PathBuf};
 use std::time::{SystemTime, UNIX_EPOCH};
 use anyhow::{anyhow, Result};
-use crate::data::{File, GeneralHash};
 
+/// Trait to convert a path to a lexical absolute path.
+/// Does not require the path to exist.
+/// 
+/// # See also
+/// * <https://internals.rust-lang.org/t/path-to-lexical-absolute/14940>
+/// * [std::fs::canonicalize]
 pub trait LexicalAbsolute {
+    /// Convert a path to a lexical absolute path.
+    /// Does not require the path to exist.
+    /// 
+    /// # Errors
+    /// Returns an error if the absolute path could not be determined.
     fn to_lexical_absolute(&self) -> std::io::Result<PathBuf>;
 }
 
 impl LexicalAbsolute for PathBuf {
+    /// Convert a path to a lexical absolute path.
+    /// Does not require the path to exist.
+    ///
+    /// # Example
+    /// ```
+    /// use std::path::PathBuf;
+    /// use backup_deduplicator::utils::LexicalAbsolute;
+    ///
+    /// let path = PathBuf::from("/a/b/../c");
+    /// let absolute = path.to_lexical_absolute().unwrap();
+    /// assert_eq!(absolute, PathBuf::from("/a/c"));
+    /// ```
+    /// 
+    /// # Errors
+    /// Returns an error if the given path is relative and the current working directory could not be determined.
+    /// * The working directory does not exist.
+    /// * Insufficient permissions to determine the working directory.
     fn to_lexical_absolute(&self) -> std::io::Result<PathBuf> {
+        // https://internals.rust-lang.org/t/path-to-lexical-absolute/14940
         let mut absolute = if self.is_absolute() {
             PathBuf::new()
         } else {
@@ -25,52 +54,18 @@ impl LexicalAbsolute for PathBuf {
     }
 }
 
-pub fn hash_file<T>(mut reader: T, hash: &mut GeneralHash) -> Result<u64>
-where T: std::io::Read {
-
-    let mut hasher = hash.hasher();
-    let mut buffer = [0; 4096];
-    let mut content_size = 0;
-
-    loop {
-        let bytes_read = reader.read(&mut buffer)?;
-        content_size += bytes_read as u64;
-        if bytes_read == 0 {
-            break;
-        }
-        hasher.update(&buffer[..bytes_read]);
-    }
-
-    *hash = hasher.finalize();
-
-    Ok(content_size)
-}
-
-pub fn hash_directory<'a>(children: impl Iterator<Item = &'a File>, hash: &mut GeneralHash) -> Result<u64> {
-    let mut hasher = hash.hasher();
-
-    let mut content_size = 0;
-
-    for child in children {
-        content_size += 1;
-        hasher.update(child.get_content_hash().as_bytes());
-    }
-
-    *hash = hasher.finalize();
-
-    Ok(content_size)
-}
-
-pub fn hash_path(path: &Path, hash: &mut GeneralHash) -> Result<()> {
-    let mut hasher = hash.hasher();
-
-    hasher.update(path.as_os_str().as_encoded_bytes());
-
-    *hash = hasher.finalize();
-
-    Ok(())
-}
-
+/// Decode a hex string to a byte vector.
+/// 
+/// # Example
+/// ```
+/// use backup_deduplicator::utils::decode_hex;
+/// 
+/// let bytes = decode_hex("deadbeef").unwrap();
+/// assert_eq!(bytes, vec![0xde, 0xad, 0xbe, 0xef]);
+/// ```
+/// 
+/// # Errors
+/// Returns an error if the given string is not a valid hex string.
 pub fn decode_hex(s: &str) -> Result<Vec<u8>> {
     if s.len() % 2 != 0 {
         return Err(anyhow!("Invalid hex length"));
@@ -82,8 +77,154 @@ pub fn decode_hex(s: &str) -> Result<Vec<u8>> {
         .collect()
 }
 
+/// Get the current time in seconds since the Unix epoch (in seconds).
+/// 
+/// # Returns
+/// The current time in seconds since the Unix epoch. Returns 0 if the current time is before the Unix epoch.
 pub fn get_time() -> u64 {
     SystemTime::now()
         .duration_since(UNIX_EPOCH)
         .map(|d| d.as_secs()).unwrap_or(0)
 }
+
+/// A writer that discards all data.
+/// 
+/// # Example
+/// ```
+/// use std::io::Write;
+/// 
+/// let mut writer = backup_deduplicator::utils::NullWriter::new();
+/// writer.write(b"Hello, world!").unwrap();
+/// ```
+pub struct NullWriter {}
+
+impl NullWriter {
+    /// Create a new NullWriter.
+    /// 
+    /// # Returns
+    /// A new NullWriter.
+    pub fn new() -> Self {
+        NullWriter {}
+    }
+}
+
+impl Write for NullWriter {
+    /// Discard all data.
+    /// 
+    /// # Arguments
+    /// * `buf` - The data to write.
+    /// 
+    /// # Returns
+    /// The number of bytes written. Always the same as the length of `buf`.
+    /// 
+    /// # Errors
+    /// Never
+    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {Ok(buf.len())}
+
+    /// Flush the writer.
+    /// 
+    /// # Errors
+    /// Never
+    fn flush(&mut self) -> std::io::Result<()> {Ok(())}
+}
+
+/// Utility functions for the main function of `backup-deduplicator`.
+pub mod main {
+    use std::env;
+    use std::path::PathBuf;
+    use crate::utils::LexicalAbsolute;
+
+    /// Changes the working directory to the given path.
+    ///
+    /// # Arguments
+    /// * `working_directory` - The new working directory.
+    ///
+    /// # Returns
+    /// The new working directory.
+    ///
+    /// # Exit
+    /// Exits the process if the working directory could not be changed.
+    pub fn change_working_directory(working_directory: Option<PathBuf>) -> PathBuf {
+        match working_directory {
+            None => {},
+            Some(working_directory) => {
+                env::set_current_dir(&working_directory).unwrap_or_else(|_| {
+                    eprintln!("IO error, could not change working directory: {}", working_directory.display());
+                    std::process::exit(exitcode::CONFIG);
+                });
+            }
+        }
+
+        env::current_dir().unwrap_or_else(|_| {
+            eprintln!("IO error, could not resolve working directory");
+            std::process::exit(exitcode::CONFIG);
+        }).canonicalize().unwrap_or_else(|_| {
+            eprintln!("IO error, could not resolve working directory");
+            std::process::exit(exitcode::CONFIG);
+        })
+    }
+
+    /// Option how to parse a path.
+    ///
+    /// # See also
+    /// * [parse_path]
+    #[derive(Debug, Clone, Copy)]
+    pub enum ParsePathKind {
+        /// Do not post-process the path.
+        Direct,
+        /// Convert the path to a absolute path. The path must exist.
+        AbsoluteExisting,
+        /// Convert the path to a absolute path. The path might not exist.
+        AbsoluteNonExisting,
+    }
+
+    /// Parse a path from a string.
+    ///
+    /// # Arguments
+    /// * `path` - The path to parse.
+    /// * `kind` - How to parse the path.
+    ///
+    /// # Returns
+    /// The parsed path.
+    pub fn parse_path(path: &str, kind: ParsePathKind) -> PathBuf {
+        let path = std::path::Path::new(path);
+
+        let path = path.to_path_buf();
+
+        let path = match kind {
+            ParsePathKind::Direct => path,
+            ParsePathKind::AbsoluteExisting => to_lexical_absolute(path, true),
+            ParsePathKind::AbsoluteNonExisting => to_lexical_absolute(path, false),
+        };
+
+        path
+    }
+
+    /// Convert a path to a absolute path.
+    ///
+    /// # Arguments
+    /// * `path` - The path to convert.
+    /// * `exists` - Whether the path must exist.
+    ///
+    /// # Returns
+    /// The absolute path.
+    ///
+    /// # Exit
+    /// Exits the process if the path could not be resolved.
+    pub fn to_lexical_absolute(path: PathBuf, exists: bool) -> PathBuf {
+        let path = match exists {
+            true => path.canonicalize(),
+            false => path.to_lexical_absolute(),
+        };
+
+        let path = match path{
+            Ok(out) => out,
+            Err(e) => {
+                eprintln!("IO error, could not resolve output file: {:?}", e);
+                std::process::exit(exitcode::CONFIG);
+            }
+        };
+
+        path
+    }
+}
\ No newline at end of file