diff --git a/packages/ipfs-unixfs-importer/.aegir.js b/packages/ipfs-unixfs-importer/.aegir.js
new file mode 100644
index 00000000..69129724
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/.aegir.js
@@ -0,0 +1,7 @@
+'use strict'
+
+module.exports = {
+  karma: {
+    browserNoActivityTimeout: 500 * 1000
+  }
+}
diff --git a/packages/ipfs-unixfs-importer/.gitignore b/packages/ipfs-unixfs-importer/.gitignore
new file mode 100644
index 00000000..41396f3f
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/.gitignore
@@ -0,0 +1,45 @@
+docs
+yarn.lock
+**/node_modules/
+**/*.log
+test/repo-tests*
+**/bundle.js
+
+# Logs
+logs
+*.log
+
+coverage
+
+# Runtime data
+pids
+*.pid
+*.seed
+
+# Directory for instrumented libs generated by jscoverage/JSCover
+lib-cov
+
+# Coverage directory used by tools like istanbul
+coverage
+.nyc_output
+
+# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
+.grunt
+
+# node-waf configuration
+.lock-wscript
+
+build
+
+# Dependency directory
+# https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git
+node_modules
+
+lib
+dist
+test/test-data/go-ipfs-repo/LOCK
+test/test-data/go-ipfs-repo/LOG
+test/test-data/go-ipfs-repo/LOG.old
+
+# while testing npm5
+package-lock.json
diff --git a/packages/ipfs-unixfs-importer/.npmignore b/packages/ipfs-unixfs-importer/.npmignore
new file mode 100644
index 00000000..70ea7a67
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/.npmignore
@@ -0,0 +1,33 @@
+
+.DS_Store
+tests/repo-tests*
+
+# Logs
+logs
+*.log
+
+# Runtime data
+pids
+*.pid
+*.seed
+
+# Directory for instrumented libs generated by jscoverage/JSCover
+lib-cov
+
+# Coverage directory used by tools like istanbul
+coverage
+
+# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
+.grunt
+
+# node-waf configuration
+.lock-wscript
+
+# Compiled binary addons (http://nodejs.org/api/addons.html)
+build/Release
+
+# Dependency directory
+# https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git
+node_modules
+
+test
diff --git a/packages/ipfs-unixfs-importer/.travis.yml b/packages/ipfs-unixfs-importer/.travis.yml
new file mode 100644
index 00000000..be3ad283
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/.travis.yml
@@ -0,0 +1,40 @@
+language: node_js
+cache: npm
+stages:
+  - check
+  - test
+  - cov
+
+node_js:
+  - '12'
+
+os:
+  - linux
+  - osx
+  - windows
+
+script: npx nyc -s npm run test:node -- --bail
+after_success: npx nyc report --reporter=text-lcov > coverage.lcov && npx codecov
+
+jobs:
+  include:
+    - stage: check
+      script:
+        - npx aegir commitlint --travis
+        - npx aegir dep-check
+        - npm run lint
+
+    - stage: test
+      name: chrome
+      addons:
+        chrome: stable
+      script: npx aegir test -t browser -t webworker
+
+    - stage: test
+      name: firefox
+      addons:
+        firefox: latest
+      script: npx aegir test -t browser -t webworker -- --browsers FirefoxHeadless
+
+notifications:
+  email: false
diff --git a/packages/ipfs-unixfs-importer/CHANGELOG.md b/packages/ipfs-unixfs-importer/CHANGELOG.md
new file mode 100644
index 00000000..bf0082ce
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/CHANGELOG.md
@@ -0,0 +1,928 @@
+<a name="0.45.0"></a>
+# [0.45.0](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.44.1...v0.45.0) (2020-02-04)
+
+
+### Bug Fixes
+
+* only output unixfs things ([#49](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/49)) ([8ecdcf2](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/8ecdcf2))
+
+
+### BREAKING CHANGES
+
+* If your data is below the chunk size, and you have `rawLeaves` and
+`reduceSingleLeafToSelf` set to true, you'll get a CID that resolves
+to a bona fide UnixFS file back with metadata and all that good
+stuff instead of a `dag-raw` node.
+
+
+
+<a name="0.44.1"></a>
+## [0.44.1](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.44.0...v0.44.1) (2020-02-03)
+
+
+### Performance Improvements
+
+* small bl ([#52](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/52)) ([3d461ce](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/3d461ce))
+
+
+
+<a name="0.44.0"></a>
+# [0.44.0](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.43.1...v0.44.0) (2020-01-15)
+
+
+### Features
+
+* allow overriding of internal functions ([#48](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/48)) ([0bff5f2](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/0bff5f2))
+
+
+
+<a name="0.43.1"></a>
+## [0.43.1](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.43.0...v0.43.1) (2020-01-09)
+
+
+### Bug Fixes
+
+* specify default codec ([4b79619](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/4b79619))
+
+
+
+<a name="0.43.0"></a>
+# [0.43.0](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.42.0...v0.43.0) (2020-01-08)
+
+
+
+# [0.42.0](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.41.0...v0.42.0) (2019-11-27)
+
+
+### Performance Improvements
+
+* avoid unnecessary buffer copy ([#40](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/40)) ([b5e5b5a](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/b5e5b5a15f8460c0effbedfd6aa39a1e594733df))
+* concurrent file import ([#41](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/41)) ([68ac8cc](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/68ac8cc233dbe73fcb8244911e09ed59789cddc9)), closes [#38](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/38)
+
+
+
+<a name="0.41.0"></a>
+# [0.41.0](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.40.0...v0.41.0) (2019-11-22)
+
+
+### Features
+
+* support storing metadata in unixfs nodes ([#39](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/39)) ([a47c9ed](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/a47c9ed))
+
+
+
+# [0.40.0](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.11...v0.40.0) (2019-08-05)
+
+
+### Bug Fixes
+
+* update to newest IPLD libraries ([#37](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/37)) ([f79355f](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/f79355f))
+
+
+
+## [0.39.11](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.10...v0.39.11) (2019-06-06)
+
+
+### Bug Fixes
+
+* validate rabin args ([#32](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/32)) ([55c5dba](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/55c5dba))
+
+
+
+<a name="0.39.10"></a>
+## [0.39.10](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.9...v0.39.10) (2019-06-04)
+
+
+### Bug Fixes
+
+* remove unused dep ([efa2ca2](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/efa2ca2))
+
+
+### Features
+
+* use a rabin chunker in wasm ([#31](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/31)) ([d4021db](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/d4021db))
+
+
+
+<a name="0.39.9"></a>
+## [0.39.9](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.8...v0.39.9) (2019-05-24)
+
+
+### Features
+
+* adds js implementation of rabin chunker for windows and browser ([#30](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/30)) ([542b3e4](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/542b3e4))
+
+
+
+<a name="0.39.8"></a>
+## [0.39.8](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.7...v0.39.8) (2019-05-24)
+
+
+### Bug Fixes
+
+* make trickle dag importer compatible with go ([#29](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/29)) ([01c7323](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/01c7323))
+
+
+
+<a name="0.39.7"></a>
+## [0.39.7](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.6...v0.39.7) (2019-05-23)
+
+
+### Bug Fixes
+
+* remove leftpad ([#28](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/28)) ([0aeb0f6](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/0aeb0f6))
+
+
+
+<a name="0.39.6"></a>
+## [0.39.6](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.5...v0.39.6) (2019-05-20)
+
+
+### Bug Fixes
+
+* final trickle dag tests ([#27](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/27)) ([72b8bc7](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/72b8bc7))
+
+
+
+<a name="0.39.5"></a>
+## [0.39.5](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.4...v0.39.5) (2019-05-20)
+
+
+
+<a name="0.39.4"></a>
+## [0.39.4](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.3...v0.39.4) (2019-05-20)
+
+
+### Bug Fixes
+
+* add missing dependency async-iterator-all ([#26](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/26)) ([83d4075](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/83d4075))
+
+
+
+<a name="0.39.3"></a>
+## [0.39.3](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.2...v0.39.3) (2019-05-18)
+
+
+
+<a name="0.39.2"></a>
+## [0.39.2](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.1...v0.39.2) (2019-05-17)
+
+
+### Bug Fixes
+
+* move async-iterator-first out of dev deps ([7b76f4b](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/7b76f4b))
+
+
+
+<a name="0.39.1"></a>
+## [0.39.1](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.0...v0.39.1) (2019-05-17)
+
+
+
+<a name="0.39.0"></a>
+# [0.39.0](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.38.5...v0.39.0) (2019-05-17)
+
+
+### Features
+
+* switch to async await ([#24](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/24)) ([2a40ecb](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/2a40ecb))
+
+
+
+<a name="0.38.5"></a>
+## [0.38.5](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.38.4...v0.38.5) (2019-03-18)
+
+
+
+<a name="0.38.4"></a>
+## [0.38.4](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.38.3...v0.38.4) (2019-01-18)
+
+
+
+<a name="0.38.3"></a>
+## [0.38.3](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.38.2...v0.38.3) (2019-01-16)
+
+
+### Bug Fixes
+
+* increase test timeouts for sharding ([#18](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/18)) ([bc35f6f](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/bc35f6f))
+
+
+
+<a name="0.38.2"></a>
+## [0.38.2](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.38.1...v0.38.2) (2019-01-14)
+
+
+
+<a name="0.38.1"></a>
+## [0.38.1](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.38.0...v0.38.1) (2019-01-14)
+
+
+### Bug Fixes
+
+* turn non-function progress callback into a noop ([#16](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/16)) ([6d2c15d](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/6d2c15d))
+
+
+
+<a name="0.38.0"></a>
+# [0.38.0](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.37.3...v0.38.0) (2019-01-04)
+
+
+### Bug Fixes
+
+* pull-stream/throughs/through is not pull-through ([df0abfa](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/df0abfa))
+
+
+### Performance Improvements
+
+* do not create new buffers ([4ef5dbc](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/4ef5dbc))
+* switch out pull-block for bl ([#12](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/12)) ([4e5b618](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/4e5b618))
+* write files in parallel chunks, use a through instead of a map ([6a86d55](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/6a86d55))
+
+
+
+<a name="0.37.3"></a>
+## [0.37.3](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.37.2...v0.37.3) (2018-12-19)
+
+
+### Bug Fixes
+
+* increase sharding timeouts ([69210b6](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/69210b6))
+
+
+
+<a name="0.37.2"></a>
+## [0.37.2](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.37.1...v0.37.2) (2018-12-04)
+
+
+### Bug Fixes
+
+* fix regex to match files with square brackets ([986f945](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/986f945))
+
+
+
+<a name="0.37.1"></a>
+## [0.37.1](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.37.0...v0.37.1) (2018-12-03)
+
+
+### Performance Improvements
+
+* deep require pull stream modules ([092b5b4](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/092b5b4))
+
+
+
+<a name="0.37.0"></a>
+# [0.37.0](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.36.0...v0.37.0) (2018-11-26)
+
+
+### Features
+
+* export hash function from sharding ([7e24107](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/7e24107))
+
+
+
+<a name="0.36.0"></a>
+# [0.36.0](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.34.0...v0.36.0) (2018-11-23)
+
+
+### Bug Fixes
+
+* support slashes in filenames ([3171fab](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/3171fab))
+
+
+### Features
+
+* split hamt out into separate module, closes [#1](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/1) ([bf216a9](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/bf216a9))
+
+
+
+<a name="0.34.0"></a>
+# [0.34.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.33.0...v0.34.0) (2018-11-12)
+
+
+### Bug Fixes
+
+* updates ipld-dag-pb dep to version without .cid properties ([aa61cce](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/aa61cce))
+
+
+
+<a name="0.33.0"></a>
+# [0.33.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.32.8...v0.33.0) (2018-10-27)
+
+
+### Bug Fixes
+
+* fixes [#230](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/230) by returning a through stream that emits the error instead of throwing it ([fdd8429](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/fdd8429))
+
+
+
+<a name="0.32.8"></a>
+## [0.32.8](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.32.7...v0.32.8) (2018-10-25)
+
+
+
+<a name="0.32.7"></a>
+## [0.32.7](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.32.6...v0.32.7) (2018-10-12)
+
+
+### Bug Fixes
+
+* return correct chunks of streams, fixes [#229](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/229) ([362c685](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/362c685))
+* skip rabin tests on windows ([ea9e3c3](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/ea9e3c3))
+
+
+
+<a name="0.32.6"></a>
+## [0.32.6](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.32.5...v0.32.6) (2018-10-12)
+
+
+### Bug Fixes
+
+* do not use cid property of DAGNodes just yet ([7a2a308](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/7a2a308))
+
+
+
+<a name="0.32.5"></a>
+## [0.32.5](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.32.4...v0.32.5) (2018-10-12)
+
+
+### Bug Fixes
+
+* do not overwrite cid property of DAGNodes ([c2e38ae](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/c2e38ae))
+* make sure errors from unmarshalling are caught ([8b2335c](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/8b2335c))
+
+
+
+<a name="0.32.4"></a>
+## [0.32.4](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.32.3...v0.32.4) (2018-08-23)
+
+
+### Bug Fixes
+
+* build & export interop with go-ipfs for small file raw leaves ([11885fa](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/11885fa))
+
+
+
+<a name="0.32.3"></a>
+## [0.32.3](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.32.2...v0.32.3) (2018-08-21)
+
+
+### Bug Fixes
+
+* import with CID version 1 ([6ef929d](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/6ef929d))
+* typo ([c5cb38b](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/c5cb38b))
+
+
+
+<a name="0.32.2"></a>
+## [0.32.2](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.32.1...v0.32.2) (2018-08-11)
+
+
+### Bug Fixes
+
+* make rabin an optional dependency ([bef3152](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/bef3152))
+* skip first hash algorithm as it is no longer valid ([0b84b76](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/0b84b76)), closes [js-multihash#57](https://github.com/js-multihash/issues/57)
+
+
+
+<a name="0.32.1"></a>
+## [0.32.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.32.0...v0.32.1) (2018-08-08)
+
+
+### Bug Fixes
+
+* do not emit empty buffers for non-empty files ([ccc4ad2](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/ccc4ad2))
+
+
+
+<a name="0.32.0"></a>
+# [0.32.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.31.3...v0.32.0) (2018-08-08)
+
+
+### Features
+
+* **importer:** add rabin fingerprinting chunk algorithm ([83a5feb](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/83a5feb)), closes [ipfs/js-ipfs#1283](https://github.com/ipfs/js-ipfs/issues/1283)
+
+
+
+<a name="0.31.3"></a>
+## [0.31.3](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.31.2...v0.31.3) (2018-07-24)
+
+
+### Bug Fixes
+
+* return cids from builder ([0d3d3d8](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/0d3d3d8))
+
+
+
+<a name="0.31.2"></a>
+## [0.31.2](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.31.1...v0.31.2) (2018-07-20)
+
+
+
+<a name="0.31.1"></a>
+## [0.31.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.31.0...v0.31.1) (2018-07-19)
+
+
+
+<a name="0.31.0"></a>
+# [0.31.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.30.1...v0.31.0) (2018-07-19)
+
+
+
+<a name="0.30.1"></a>
+## [0.30.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.30.0...v0.30.1) (2018-07-19)
+
+
+### Features
+
+* support --raw-leaves ([7a29d83](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/7a29d83)), closes [ipfs/js-ipfs#1432](https://github.com/ipfs/js-ipfs/issues/1432)
+
+
+
+<a name="0.30.0"></a>
+# [0.30.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.29.0...v0.30.0) (2018-06-12)
+
+
+
+<a name="0.29.0"></a>
+# [0.29.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.28.1...v0.29.0) (2018-04-23)
+
+
+
+<a name="0.28.1"></a>
+## [0.28.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.28.0...v0.28.1) (2018-04-12)
+
+
+
+<a name="0.28.0"></a>
+# [0.28.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.27.0...v0.28.0) (2018-04-10)
+
+
+
+<a name="0.27.0"></a>
+# [0.27.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.26.0...v0.27.0) (2018-03-27)
+
+
+### Features
+
+* exporter - support slicing streams stored in deeply nested DAGs ([#208](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/208)) ([8568cd5](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/8568cd5))
+
+
+
+<a name="0.26.0"></a>
+# [0.26.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.25.0...v0.26.0) (2018-03-22)
+
+
+### Features
+
+* Adds begin/end byte slices to exporter ([#207](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/207)) ([8e11d77](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/8e11d77))
+
+
+
+<a name="0.25.0"></a>
+# [0.25.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.24.4...v0.25.0) (2018-03-20)
+
+
+### Features
+
+* Add reader to read files or part of files as streams ([833accf](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/833accf))
+
+
+
+<a name="0.24.4"></a>
+## [0.24.4](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.24.3...v0.24.4) (2018-02-27)
+
+
+### Bug Fixes
+
+* use "ipld" instead of "ipld-resolver" ([f4de206](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/f4de206))
+
+
+
+<a name="0.24.3"></a>
+## [0.24.3](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.24.2...v0.24.3) (2018-02-27)
+
+
+
+<a name="0.24.2"></a>
+## [0.24.2](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.24.1...v0.24.2) (2017-12-15)
+
+
+
+<a name="0.24.1"></a>
+## [0.24.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.24.0...v0.24.1) (2017-11-12)
+
+
+
+<a name="0.24.0"></a>
+# [0.24.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.23.1...v0.24.0) (2017-11-12)
+
+
+### Features
+
+* exporter maxDepth ([#197](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/197)) ([211e4e3](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/211e4e3))
+
+
+
+<a name="0.23.1"></a>
+## [0.23.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.23.0...v0.23.1) (2017-11-10)
+
+
+### Features
+
+* windows interop ([#195](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/195)) ([aa21ff3](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/aa21ff3))
+
+
+
+<a name="0.23.0"></a>
+# [0.23.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.22.5...v0.23.0) (2017-11-07)
+
+
+### Features
+
+* Include hash field for exported files ([#191](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/191)) ([8b13957](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/8b13957))
+
+
+
+<a name="0.22.5"></a>
+## [0.22.5](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.22.4...v0.22.5) (2017-09-08)
+
+
+### Features
+
+* Use passed cidVersion option when writing to storage ([#185](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/185)) ([0cd2d60](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/0cd2d60))
+
+
+
+<a name="0.22.4"></a>
+## [0.22.4](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.22.3...v0.22.4) (2017-09-08)
+
+
+### Features
+
+* allow specify hash algorithm for large files ([#184](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/184)) ([69915da](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/69915da))
+
+
+
+<a name="0.22.3"></a>
+## [0.22.3](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.22.2...v0.22.3) (2017-09-07)
+
+
+
+<a name="0.22.2"></a>
+## [0.22.2](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.22.1...v0.22.2) (2017-09-07)
+
+
+### Features
+
+* Add `onlyHash` option ([#183](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/183)) ([7450a65](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/7450a65))
+* adds call to progress bar function ([#179](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/179)) ([ac6f722](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/ac6f722))
+
+
+
+<a name="0.22.1"></a>
+## [0.22.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.22.0...v0.22.1) (2017-09-04)
+
+
+
+<a name="0.22.0"></a>
+# [0.22.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.21.0...v0.22.0) (2017-07-23)
+
+
+
+<a name="0.21.0"></a>
+# [0.21.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.20.0...v0.21.0) (2017-07-04)
+
+
+
+<a name="0.20.0"></a>
+# [0.20.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.19.2...v0.20.0) (2017-06-16)
+
+
+### Features
+
+* subtree support ([#175](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/175)) ([16b788c](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/16b788c))
+
+
+
+<a name="0.19.2"></a>
+## [0.19.2](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.19.1...v0.19.2) (2017-05-25)
+
+
+### Bug Fixes
+
+* **package:** update cids to version 0.5.0 ([59d6d0a](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/59d6d0a))
+
+
+### Features
+
+* dag-api direct support ([adaeb37](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/adaeb37))
+
+
+
+<a name="0.19.1"></a>
+## [0.19.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.19.0...v0.19.1) (2017-03-29)
+
+
+### Bug Fixes
+
+* adding a dir: leaf node gets replaced with dir if necessary ([1d682ec](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/1d682ec))
+
+
+
+<a name="0.19.0"></a>
+# [0.19.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.18.0...v0.19.0) (2017-03-24)
+
+
+### Bug Fixes
+
+* breaking the stack when importing ([993f746](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/993f746))
+* passing browser tests ([29b2740](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/29b2740))
+* using correct murmur3 codec name ([295d86e](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/295d86e))
+* using the new IPLD API ([a80f4d8](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/a80f4d8))
+
+
+
+<a name="0.18.0"></a>
+# [0.18.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.17.0...v0.18.0) (2017-03-22)
+
+
+### Bug Fixes
+
+* **package:** update ipld-dag-pb to version 0.10.0 ([#154](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/154)) ([304ff25](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/304ff25))
+* **package:** update pull-pause to version 0.0.1 ([#153](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/153)) ([4dd2143](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/4dd2143))
+
+
+### Features
+
+* upgrade to the next version of ipfs-block and blockservice ([0ca25b2](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/0ca25b2))
+
+
+
+<a name="0.17.0"></a>
+# [0.17.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.16.1...v0.17.0) (2017-02-08)
+
+
+### Features
+
+* update to latest ipld-resolver ([#137](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/137)) ([211dfb6](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/211dfb6))
+
+
+
+<a name="0.16.1"></a>
+## [0.16.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.16.0...v0.16.1) (2017-02-02)
+
+
+### Bug Fixes
+
+* exporter: recurse correctly into subdirs ([#136](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/136)) ([69c0d04](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/69c0d04))
+
+
+
+<a name="0.16.0"></a>
+# [0.16.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.15.4...v0.16.0) (2017-02-02)
+
+
+### Bug Fixes
+
+* **package:** update is-ipfs to version 0.3.0 ([#134](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/134)) ([0063f9d](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/0063f9d))
+
+
+
+<a name="0.15.4"></a>
+## [0.15.4](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.15.3...v0.15.4) (2017-01-31)
+
+
+### Bug Fixes
+
+* case for empty file ([#132](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/132)) ([fee55d1](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/fee55d1))
+
+
+
+<a name="0.15.3"></a>
+## [0.15.3](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.15.2...v0.15.3) (2017-01-30)
+
+
+### Bug Fixes
+
+* expect empty stream to not generate any nodes ([#131](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/131)) ([7b054b6](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/7b054b6))
+
+
+
+<a name="0.15.2"></a>
+## [0.15.2](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.15.1...v0.15.2) (2017-01-30)
+
+
+### Bug Fixes
+
+* stop export visitor from trying to resolve leaf object ([#130](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/130)) ([651f113](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/651f113))
+
+
+
+<a name="0.15.1"></a>
+## [0.15.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.15.0...v0.15.1) (2017-01-29)
+
+
+### Bug Fixes
+
+* **package:** update cids to version 0.4.0 ([#122](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/122)) ([65a6759](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/65a6759))
+
+
+
+<a name="0.15.0"></a>
+# [0.15.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.14.2...v0.15.0) (2017-01-11)
+
+
+
+<a name="0.14.2"></a>
+## [0.14.2](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.14.1...v0.14.2) (2016-12-13)
+
+
+
+<a name="0.14.1"></a>
+## [0.14.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.14.0...v0.14.1) (2016-12-08)
+
+
+
+<a name="0.14.0"></a>
+# [0.14.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.13.0...v0.14.0) (2016-11-24)
+
+
+### Features
+
+* upgrade to latest dag-pb API ([#88](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/88)) ([51d1245](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/51d1245))
+
+
+
+<a name="0.13.0"></a>
+# [0.13.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.12.0...v0.13.0) (2016-11-03)
+
+
+
+<a name="0.12.0"></a>
+# [0.12.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.11.4...v0.12.0) (2016-10-28)
+
+
+### Bug Fixes
+
+* **exporter:** add some parallel fetching of blocks where possible ([43503d4](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/43503d4))
+
+
+### Features
+
+* migrate importer to use IPLD Resolver and the new IPLD format ([89c3602](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/89c3602))
+
+
+
+<a name="0.11.4"></a>
+## [0.11.4](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.11.3...v0.11.4) (2016-09-11)
+
+
+### Features
+
+* **exporter:** implement recursive file export ([68e09a7](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/68e09a7))
+
+
+
+<a name="0.11.3"></a>
+## [0.11.3](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.11.2...v0.11.3) (2016-09-09)
+
+
+### Features
+
+* **exporter:** return file sizes ([73cf78a](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/73cf78a))
+
+
+
+<a name="0.11.2"></a>
+## [0.11.2](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.11.1...v0.11.2) (2016-09-09)
+
+
+
+<a name="0.11.1"></a>
+## [0.11.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.11.0...v0.11.1) (2016-09-09)
+
+
+
+<a name="0.11.0"></a>
+# [0.11.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.10.2...v0.11.0) (2016-09-08)
+
+
+### Bug Fixes
+
+* **tests:** ignore ordering ([f8d1b2a](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/f8d1b2a))
+
+
+
+<a name="0.10.2"></a>
+## [0.10.2](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.10.1...v0.10.2) (2016-08-09)
+
+
+
+<a name="0.10.1"></a>
+## [0.10.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.10.0...v0.10.1) (2016-08-09)
+
+
+
+<a name="0.10.0"></a>
+# [0.10.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.9.0...v0.10.0) (2016-06-28)
+
+
+
+<a name="0.9.0"></a>
+# [0.9.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.8.0...v0.9.0) (2016-05-27)
+
+
+
+<a name="0.8.0"></a>
+# [0.8.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.7.0...v0.8.0) (2016-05-21)
+
+
+
+<a name="0.7.0"></a>
+# [0.7.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.6.1...v0.7.0) (2016-05-21)
+
+
+
+<a name="0.6.1"></a>
+## [0.6.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.6.0...v0.6.1) (2016-05-05)
+
+
+
+<a name="0.6.0"></a>
+# [0.6.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.5.0...v0.6.0) (2016-05-03)
+
+
+
+<a name="0.5.0"></a>
+# [0.5.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.4.5...v0.5.0) (2016-04-26)
+
+
+
+<a name="0.4.5"></a>
+## [0.4.5](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.4.4...v0.4.5) (2016-04-24)
+
+
+
+<a name="0.4.4"></a>
+## [0.4.4](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.4.3...v0.4.4) (2016-04-24)
+
+
+
+<a name="0.4.3"></a>
+## [0.4.3](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.4.2...v0.4.3) (2016-04-24)
+
+
+### Bug Fixes
+
+* clean up dependencies ([a3bee40](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/a3bee40))
+* **importer:** cleanup smaller issues ([eab17fe](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/eab17fe))
+
+
+
+<a name="0.4.2"></a>
+## [0.4.2](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.4.1...v0.4.2) (2016-04-19)
+
+
+
+<a name="0.4.1"></a>
+## [0.4.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.4.0...v0.4.1) (2016-04-19)
+
+
+
+<a name="0.4.0"></a>
+# [0.4.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.3.3...v0.4.0) (2016-04-19)
+
+
+
+<a name="0.3.3"></a>
+## [0.3.3](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.3.2...v0.3.3) (2016-03-22)
+
+
+
+<a name="0.3.2"></a>
+## [0.3.2](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.3.1...v0.3.2) (2016-03-22)
+
+
+
+<a name="0.3.1"></a>
+## [0.3.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.3.0...v0.3.1) (2016-03-22)
+
+
+
+<a name="0.3.0"></a>
+# [0.3.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.2.0...v0.3.0) (2016-03-21)
+
+
+
+<a name="0.2.0"></a>
+# [0.2.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.1.0...v0.2.0) (2016-02-17)
+
+
+
+<a name="0.1.0"></a>
+# 0.1.0 (2016-02-12)
+
+
+
diff --git a/packages/ipfs-unixfs-importer/LICENSE b/packages/ipfs-unixfs-importer/LICENSE
new file mode 100644
index 00000000..b7cf9f52
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/LICENSE
@@ -0,0 +1,22 @@
+The MIT License (MIT)
+
+Copyright (c) 2016 David Dias
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
diff --git a/packages/ipfs-unixfs-importer/README.md b/packages/ipfs-unixfs-importer/README.md
new file mode 100644
index 00000000..da32517c
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/README.md
@@ -0,0 +1,186 @@
+# ipfs-unixfs-importer <!-- omit in toc -->
+
+[![](https://img.shields.io/badge/made%20by-Protocol%20Labs-blue.svg?style=flat-square)](http://ipn.io)
+[![](https://img.shields.io/badge/project-IPFS-blue.svg?style=flat-square)](http://ipfs.io/)
+[![](https://img.shields.io/badge/freenode-%23ipfs-blue.svg?style=flat-square)](http://webchat.freenode.net/?channels=%23ipfs)
+[![standard-readme compliant](https://img.shields.io/badge/standard--readme-OK-green.svg?style=flat-square)](https://github.com/RichardLitt/standard-readme)
+[![Build Status](https://flat.badgen.net/travis/ipfs/js-ipfs-unixfs-importer)](https://travis-ci.com/ipfs/js-ipfs-unixfs-importer)
+[![Codecov](https://codecov.io/gh/ipfs/js-ipfs-unixfs-importer/branch/master/graph/badge.svg)](https://codecov.io/gh/ipfs/js-ipfs-unixfs-importer)
+[![Dependency Status](https://david-dm.org/ipfs/js-ipfs-unixfs-importer.svg?style=flat-square)](https://david-dm.org/ipfs/js-ipfs-unixfs-importer)
+[![js-standard-style](https://img.shields.io/badge/code%20style-standard-brightgreen.svg?style=flat-square)](https://github.com/feross/standard)
+![](https://img.shields.io/badge/npm-%3E%3D3.0.0-orange.svg?style=flat-square)
+![](https://img.shields.io/badge/Node.js-%3E%3D8.0.0-orange.svg?style=flat-square)
+
+> JavaScript implementation of the layout and chunking mechanisms used by IPFS to handle Files
+
+## Lead Maintainer <!-- omit in toc -->
+
+[Alex Potsides](https://github.com/achingbrain)
+
+## Table of Contents <!-- omit in toc -->
+
+- [Install](#install)
+- [Usage](#usage)
+  - [Example](#example)
+    - [API](#api)
+    - [const import = importer(source, ipld [, options])](#const-import--importersource-ipld--options)
+- [Overriding internals](#overriding-internals)
+- [Contribute](#contribute)
+- [License](#license)
+
+## Install
+
+```
+> npm install ipfs-unixfs-importer
+```
+
+## Usage
+
+### Example
+
+Let's create a little directory to import:
+
+```sh
+> cd /tmp
+> mkdir foo
+> echo 'hello' > foo/bar
+> echo 'world' > foo/quux
+```
+
+And write the importing logic:
+
+```js
+const importer = require('ipfs-unixfs-importer')
+
+// Import path /tmp/foo/bar
+const source = [{
+  path: '/tmp/foo/bar',
+  content: fs.createReadStream(file)
+}, {
+  path: '/tmp/foo/quxx',
+  content: fs.createReadStream(file2)
+}]
+
+// You need to create and pass an ipld-resolve instance
+// https://github.com/ipld/js-ipld-resolver
+for await (const entry of importer(source, ipld, options)) {
+  console.info(entry)
+}
+```
+
+When run, metadata about DAGNodes in the created tree is printed until the root:
+
+```js
+{
+  cid: CID, // see https://github.com/multiformats/js-cid
+  path: 'tmp/foo/bar',
+  unixfs: UnixFS // see https://github.com/ipfs/js-ipfs-unixfs
+}
+{
+  cid: CID, // see https://github.com/multiformats/js-cid
+  path: 'tmp/foo/quxx',
+  unixfs: UnixFS // see https://github.com/ipfs/js-ipfs-unixfs
+}
+{
+  cid: CID, // see https://github.com/multiformats/js-cid
+  path: 'tmp/foo',
+  unixfs: UnixFS // see https://github.com/ipfs/js-ipfs-unixfs
+}
+{
+  cid: CID, // see https://github.com/multiformats/js-cid
+  path: 'tmp',
+  unixfs: UnixFS // see https://github.com/ipfs/js-ipfs-unixfs
+}
+```
+
+#### API
+
+```js
+const importer = require('ipfs-unixfs-importer')
+```
+
+#### const import = importer(source, ipld [, options])
+
+The `import` function returns an async iterator takes a source async iterator that yields objects of the form:
+
+```js
+{
+  path: 'a name',
+  content: (Buffer or iterator emitting Buffers),
+  mtime: (Number representing seconds since (positive) or before (negative) the Unix Epoch),
+  mode: (Number representing ugo-rwx, setuid, setguid and sticky bit)
+}
+```
+
+`import` will output file info objects as files get stored in IPFS. When stats on a node are emitted they are guaranteed to have been written.
+
+`ipld` is an instance of the [`IPLD Resolver`](https://github.com/ipld/js-ipld-resolver) or the [`js-ipfs` `dag api`](https://github.com/ipfs/interface-ipfs-core/blob/master/SPEC/DAG.md)
+
+The input's file paths and directory structure will be preserved in the [`dag-pb`](https://github.com/ipld/js-ipld-dag-pb) created nodes.
+
+`options` is an JavaScript option that might include the following keys:
+
+- `wrap` (boolean, defaults to false): if true, a wrapping node will be created
+- `shardSplitThreshold` (positive integer, defaults to 1000): the number of directory entries above which we decide to use a sharding directory builder (instead of the default flat one)
+- `chunker` (string, defaults to `"fixed"`): the chunking strategy. Supports:
+  - `fixed`
+  - `rabin`
+- `avgChunkSize` (positive integer, defaults to `262144`): the average chunk size (rabin chunker only)
+- `minChunkSize` (positive integer): the minimum chunk size (rabin chunker only)
+- `maxChunkSize` (positive integer, defaults to `262144`): the maximum chunk size
+- `strategy` (string, defaults to `"balanced"`): the DAG builder strategy name. Supports:
+  - `flat`: flat list of chunks
+  - `balanced`: builds a balanced tree
+  - `trickle`: builds [a trickle tree](https://github.com/ipfs/specs/pull/57#issuecomment-265205384)
+- `maxChildrenPerNode` (positive integer, defaults to `174`): the maximum children per node for the `balanced` and `trickle` DAG builder strategies
+- `layerRepeat` (positive integer, defaults to 4): (only applicable to the `trickle` DAG builder strategy). The maximum repetition of parent nodes for each layer of the tree.
+- `reduceSingleLeafToSelf` (boolean, defaults to `true`): optimization for, when reducing a set of nodes with one node, reduce it to that node.
+- `hamtHashFn` (async function(string) Buffer): a function that hashes file names to create HAMT shards
+- `hamtBucketBits` (positive integer, defaults to `8`): the number of bits at each bucket of the HAMT
+- `progress` (function): a function that will be called with the byte length of chunks as a file is added to ipfs.
+- `onlyHash` (boolean, defaults to false): Only chunk and hash - do not write to disk
+- `hashAlg` (string): multihash hashing algorithm to use
+- `cidVersion` (integer, default 0): the CID version to use when storing the data (storage keys are based on the CID, _including_ it's version)
+- `rawLeaves` (boolean, defaults to false): When a file would span multiple DAGNodes, if this is true the leaf nodes will not be wrapped in `UnixFS` protobufs and will instead contain the raw file bytes
+- `leafType` (string, defaults to `'file'`) what type of UnixFS node leaves should be - can be `'file'` or `'raw'` (ignored when `rawLeaves` is `true`)
+- `blockWriteConcurrency` (positive integer, defaults to 10) How many blocks to hash and write to the block store concurrently. For small numbers of large files this should be high (e.g. 50).
+- `fileImportConcurrency` (number, defaults to 50) How many files to import concurrently. For large numbers of small files this should be high (e.g. 50).
+
+## Overriding internals
+
+Several aspects of the importer are overridable by specifying functions as part of the options object with these keys:
+
+- `chunkValidator` (function): Optional function that supports the signature `async function * (source, options)`
+  - This function takes input from the `content` field of imported entries. It should transform them into `Buffer`s, throwing an error if it cannot.
+  - It should yield `Buffer` objects constructed from the `source` or throw an `Error`
+- `chunker` (function): Optional function that supports the signature `async function * (source, options)` where `source` is an async generator and `options` is an options object
+  - It should yield `Buffer` objects.
+- `bufferImporter` (function): Optional function that supports the signature `async function * (entry, source, ipld, options)`
+  - This function should read `Buffer`s from `source` and persist them using `ipld.put` or similar
+  - `entry` is the `{ path, content }` entry, `source` is an async generator that yields Buffers
+  - It should yield functions that return a Promise that resolves to an object with the properties `{ cid, unixfs, size }` where `cid` is a [CID], `unixfs` is a [UnixFS] entry and `size` is a `Number` that represents the serialized size of the [IPLD] node that holds the buffer data.
+  - Values will be pulled from this generator in parallel - the amount of parallelisation is controlled by the `blockWriteConcurrency` option (default: 10)
+- `dagBuilder` (function): Optional function that supports the signature `async function * (source, ipld, options)`
+  - This function should read `{ path, content }` entries from `source` and turn them into DAGs
+  - It should yield a `function` that returns a `Promise` that resolves to `{ cid, path, unixfs, node }` where `cid` is a `CID`, `path` is a string, `unixfs` is a UnixFS entry and `node` is a `DAGNode`.
+  - Values will be pulled from this generator in parallel - the amount of parallelisation is controlled by the `fileImportConcurrency` option (default: 50)
+- `treeBuilder` (function): Optional function that supports the signature `async function * (source, ipld, options)`
+  - This function should read `{ cid, path, unixfs, node }` entries from `source` and place them in a directory structure
+  - It should yield an object with the properties `{ cid, path, unixfs, size }` where `cid` is a `CID`, `path` is a string, `unixfs` is a UnixFS entry and `size` is a `Number`.
+
+[ipld-resolver instance]: https://github.com/ipld/js-ipld-resolver
+[UnixFS]: https://github.com/ipfs/specs/tree/master/unixfs
+[IPLD]: https://github.com/ipld/js-ipld
+[CID]: https://github.com/multiformats/js-cid
+
+## Contribute
+
+Feel free to join in. All welcome. Open an [issue](https://github.com/ipfs/js-ipfs-unixfs-importer/issues)!
+
+This repository falls under the IPFS [Code of Conduct](https://github.com/ipfs/community/blob/master/code-of-conduct.md).
+
+[![](https://cdn.rawgit.com/jbenet/contribute-ipfs-gif/master/img/contribute.gif)](https://github.com/ipfs/community/blob/master/contributing.md)
+
+## License
+
+[MIT](LICENSE)
diff --git a/packages/ipfs-unixfs-importer/package.json b/packages/ipfs-unixfs-importer/package.json
new file mode 100644
index 00000000..48524778
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/package.json
@@ -0,0 +1,94 @@
+{
+  "name": "ipfs-unixfs-importer",
+  "version": "0.45.0",
+  "description": "JavaScript implementation of the UnixFs importer used by IPFS",
+  "leadMaintainer": "Alex Potsides <alex.potsides@protocol.ai>",
+  "main": "src/index.js",
+  "browser": {
+    "fs": false
+  },
+  "scripts": {
+    "test": "aegir test",
+    "test:node": "aegir test -t node",
+    "test:browser": "aegir test -t browser",
+    "test:webworker": "aegir test -t webworker",
+    "build": "aegir build",
+    "lint": "aegir lint",
+    "release": "aegir release",
+    "release-minor": "aegir release --type minor",
+    "release-major": "aegir release --type major",
+    "coverage": "nyc -s npm run test:node && nyc report --reporter=html",
+    "dep-check": "aegir dep-check"
+  },
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/ipfs/js-ipfs-unixfs-importer.git"
+  },
+  "keywords": [
+    "IPFS"
+  ],
+  "license": "MIT",
+  "bugs": {
+    "url": "https://github.com/ipfs/js-ipfs-unixfs-importer/issues"
+  },
+  "engines": {
+    "node": ">=8.0.0",
+    "npm": ">=3.0.0"
+  },
+  "homepage": "https://github.com/ipfs/js-ipfs-unixfs-importer#readme",
+  "devDependencies": {
+    "aegir": "^20.0.0",
+    "chai": "^4.2.0",
+    "cids": "~0.7.1",
+    "deep-extend": "~0.6.0",
+    "detect-node": "^2.0.4",
+    "dirty-chai": "^2.0.1",
+    "ipfs-unixfs-exporter": "^0.40.0",
+    "ipld": "^0.25.0",
+    "ipld-in-memory": "^3.0.0",
+    "it-buffer-stream": "^1.0.0",
+    "it-last": "^1.0.0",
+    "nyc": "^15.0.0",
+    "sinon": "^8.0.4"
+  },
+  "dependencies": {
+    "bl": "^4.0.0",
+    "err-code": "^2.0.0",
+    "hamt-sharding": "^1.0.0",
+    "ipfs-unixfs": "^0.3.0",
+    "ipld-dag-pb": "^0.18.0",
+    "it-all": "^1.0.1",
+    "it-batch": "^1.0.3",
+    "it-first": "^1.0.1",
+    "it-parallel-batch": "^1.0.3",
+    "merge-options": "^2.0.0",
+    "multicodec": "^1.0.0",
+    "multihashing-async": "^0.8.0",
+    "rabin-wasm": "~0.0.8"
+  },
+  "contributors": [
+    "Alan Shaw <alan.shaw@protocol.ai>",
+    "Alan Shaw <alan@tableflip.io>",
+    "Alex Potsides <alex@achingbrain.net>",
+    "Arpit Agarwal <atvanguard@users.noreply.github.com>",
+    "Bernard Mordan <bernard@tableflip.io>",
+    "Dan Ordille <dordille@gmail.com>",
+    "David Dias <daviddias.p@gmail.com>",
+    "Diogo Silva <fsdiogo@gmail.com>",
+    "Francisco Baio Dias <xicombd@gmail.com>",
+    "Friedel Ziegelmayer <dignifiedquire@gmail.com>",
+    "Greenkeeper <support@greenkeeper.io>",
+    "Hugo Dias <hugomrdias@gmail.com>",
+    "Hugo Dias <mail@hugodias.me>",
+    "Marcin Rataj <lidel@lidel.org>",
+    "Pedro Teixeira <i@pgte.me>",
+    "Richard Littauer <richard.littauer@gmail.com>",
+    "Richard Schneider <makaretu@gmail.com>",
+    "Stephen Whitmore <stephen.whitmore@gmail.com>",
+    "Volker Mische <volker.mische@gmail.com>",
+    "greenkeeper[bot] <greenkeeper[bot]@users.noreply.github.com>",
+    "jbenet <juan@benet.ai>",
+    "nginnever <ginneversource@gmail.com>",
+    "ᴠɪᴄᴛᴏʀ ʙᴊᴇʟᴋʜᴏʟᴍ <victorbjelkholm@gmail.com>"
+  ]
+}
diff --git a/packages/ipfs-unixfs-importer/src/chunker/fixed-size.js b/packages/ipfs-unixfs-importer/src/chunker/fixed-size.js
new file mode 100644
index 00000000..6cef6606
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/chunker/fixed-size.js
@@ -0,0 +1,39 @@
+'use strict'
+
+const BufferList = require('bl/BufferList')
+
+module.exports = async function * fixedSizeChunker (source, options) {
+  let bl = new BufferList()
+  let currentLength = 0
+  let emitted = false
+  const maxChunkSize = options.maxChunkSize
+
+  for await (const buffer of source) {
+    bl.append(buffer)
+
+    currentLength += buffer.length
+
+    while (currentLength >= maxChunkSize) {
+      yield bl.slice(0, maxChunkSize)
+      emitted = true
+
+      // throw away consumed bytes
+      if (maxChunkSize === bl.length) {
+        bl = new BufferList()
+        currentLength = 0
+      } else {
+        const newBl = new BufferList()
+        newBl.append(bl.shallowSlice(maxChunkSize))
+        bl = newBl
+
+        // update our offset
+        currentLength -= maxChunkSize
+      }
+    }
+  }
+
+  if (!emitted || currentLength) {
+    // return any remaining bytes or an empty buffer
+    yield bl.slice(0, currentLength)
+  }
+}
diff --git a/packages/ipfs-unixfs-importer/src/chunker/index.js b/packages/ipfs-unixfs-importer/src/chunker/index.js
new file mode 100644
index 00000000..ec2c494b
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/chunker/index.js
@@ -0,0 +1,18 @@
+'use strict'
+
+const errCode = require('err-code')
+
+const chunkers = {
+  fixed: require('../chunker/fixed-size'),
+  rabin: require('../chunker/rabin')
+}
+
+module.exports = (type, source, options) => {
+  const chunker = chunkers[type]
+
+  if (!chunker) {
+    throw errCode(new Error(`Unknkown chunker named ${type}`), 'ERR_UNKNOWN_CHUNKER')
+  }
+
+  return chunker(source, options)
+}
diff --git a/packages/ipfs-unixfs-importer/src/chunker/rabin.js b/packages/ipfs-unixfs-importer/src/chunker/rabin.js
new file mode 100644
index 00000000..6f1a0775
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/chunker/rabin.js
@@ -0,0 +1,73 @@
+'use strict'
+
+const BufferList = require('bl/BufferList')
+const { create } = require('rabin-wasm')
+const errcode = require('err-code')
+
+module.exports = async function * rabinChunker (source, options) {
+  const rabin = jsRabin()
+
+  let min, max, avg
+
+  if (options.minChunkSize && options.maxChunkSize && options.avgChunkSize) {
+    avg = options.avgChunkSize
+    min = options.minChunkSize
+    max = options.maxChunkSize
+  } else if (!options.avgChunkSize) {
+    throw errcode(new Error('please specify an average chunk size'), 'ERR_INVALID_AVG_CHUNK_SIZE')
+  } else {
+    avg = options.avgChunkSize
+    min = avg / 3
+    max = avg + (avg / 2)
+  }
+
+  // validate min/max/avg in the same way as go
+  if (min < 16) {
+    throw errcode(new Error('rabin min must be greater than 16'), 'ERR_INVALID_MIN_CHUNK_SIZE')
+  }
+
+  if (max < min) {
+    max = min
+  }
+
+  if (avg < min) {
+    avg = min
+  }
+
+  const sizepow = Math.floor(Math.log2(avg))
+
+  for await (const chunk of rabin(source, {
+    min: min,
+    max: max,
+    bits: sizepow,
+    window: options.window,
+    polynomial: options.polynomial
+  })) {
+    yield chunk
+  }
+}
+
+const jsRabin = () => {
+  return async function * (source, options) {
+    const r = await create(options.bits, options.min, options.max, options.window)
+    const buffers = new BufferList()
+
+    for await (const chunk of source) {
+      buffers.append(chunk)
+
+      const sizes = r.fingerprint(chunk)
+
+      for (let i = 0; i < sizes.length; i++) {
+        var size = sizes[i]
+        var buf = buffers.slice(0, size)
+        buffers.consume(size)
+
+        yield buf
+      }
+    }
+
+    if (buffers.length) {
+      yield buffers.slice(0)
+    }
+  }
+}
diff --git a/packages/ipfs-unixfs-importer/src/dag-builder/dir.js b/packages/ipfs-unixfs-importer/src/dag-builder/dir.js
new file mode 100644
index 00000000..42cce150
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/dag-builder/dir.js
@@ -0,0 +1,28 @@
+'use strict'
+
+const UnixFS = require('ipfs-unixfs')
+const persist = require('../utils/persist')
+const {
+  DAGNode
+} = require('ipld-dag-pb')
+
+const dirBuilder = async (item, ipld, options) => {
+  const unixfs = new UnixFS({
+    type: 'directory',
+    mtime: item.mtime,
+    mode: item.mode
+  })
+
+  const node = new DAGNode(unixfs.marshal(), [])
+  const cid = await persist(node, ipld, options)
+  const path = item.path
+
+  return {
+    cid,
+    path,
+    unixfs,
+    size: node.size
+  }
+}
+
+module.exports = dirBuilder
diff --git a/packages/ipfs-unixfs-importer/src/dag-builder/file/balanced.js b/packages/ipfs-unixfs-importer/src/dag-builder/file/balanced.js
new file mode 100644
index 00000000..732f7f76
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/dag-builder/file/balanced.js
@@ -0,0 +1,23 @@
+'use strict'
+
+const batch = require('it-batch')
+
+async function * balanced (source, reduce, options) {
+  yield await reduceToParents(source, reduce, options)
+}
+
+async function reduceToParents (source, reduce, options) {
+  const roots = []
+
+  for await (const chunked of batch(source, options.maxChildrenPerNode)) {
+    roots.push(await reduce(chunked))
+  }
+
+  if (roots.length > 1) {
+    return reduceToParents(roots, reduce, options)
+  }
+
+  return roots[0]
+}
+
+module.exports = balanced
diff --git a/packages/ipfs-unixfs-importer/src/dag-builder/file/buffer-importer.js b/packages/ipfs-unixfs-importer/src/dag-builder/file/buffer-importer.js
new file mode 100644
index 00000000..88d89bde
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/dag-builder/file/buffer-importer.js
@@ -0,0 +1,50 @@
+'use strict'
+
+const UnixFS = require('ipfs-unixfs')
+const persist = require('../../utils/persist')
+const {
+  DAGNode
+} = require('ipld-dag-pb')
+
+async function * bufferImporter (file, source, ipld, options) {
+  for await (const buffer of source) {
+    yield async () => {
+      options.progress(buffer.length)
+      let node
+      let unixfs
+      let size
+
+      const opts = {
+        ...options
+      }
+
+      if (options.rawLeaves) {
+        node = buffer
+        size = buffer.length
+
+        opts.codec = 'raw'
+        opts.cidVersion = 1
+      } else {
+        unixfs = new UnixFS({
+          type: options.leafType,
+          data: buffer,
+          mtime: file.mtime,
+          mode: file.mode
+        })
+
+        node = new DAGNode(unixfs.marshal())
+        size = node.size
+      }
+
+      const cid = await persist(node, ipld, opts)
+
+      return {
+        cid: cid,
+        unixfs,
+        size
+      }
+    }
+  }
+}
+
+module.exports = bufferImporter
diff --git a/packages/ipfs-unixfs-importer/src/dag-builder/file/flat.js b/packages/ipfs-unixfs-importer/src/dag-builder/file/flat.js
new file mode 100644
index 00000000..1ac77ef6
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/dag-builder/file/flat.js
@@ -0,0 +1,7 @@
+'use strict'
+
+const all = require('it-all')
+
+module.exports = async function * (source, reduce) {
+  yield await reduce(await all(source))
+}
diff --git a/packages/ipfs-unixfs-importer/src/dag-builder/file/index.js b/packages/ipfs-unixfs-importer/src/dag-builder/file/index.js
new file mode 100644
index 00000000..f44c5e51
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/dag-builder/file/index.js
@@ -0,0 +1,147 @@
+'use strict'
+
+const errCode = require('err-code')
+const UnixFS = require('ipfs-unixfs')
+const persist = require('../../utils/persist')
+const {
+  DAGNode,
+  DAGLink
+} = require('ipld-dag-pb')
+const all = require('it-all')
+const parallelBatch = require('it-parallel-batch')
+const mc = require('multicodec')
+
+const dagBuilders = {
+  flat: require('./flat'),
+  balanced: require('./balanced'),
+  trickle: require('./trickle')
+}
+
+async function * buildFileBatch (file, source, ipld, options) {
+  let count = -1
+  let previous
+  let bufferImporter
+
+  if (typeof options.bufferImporter === 'function') {
+    bufferImporter = options.bufferImporter
+  } else {
+    bufferImporter = require('./buffer-importer')
+  }
+
+  for await (const entry of parallelBatch(bufferImporter(file, source, ipld, options), options.blockWriteConcurrency)) {
+    count++
+
+    if (count === 0) {
+      previous = entry
+      continue
+    } else if (count === 1) {
+      yield previous
+      previous = null
+    }
+
+    yield entry
+  }
+
+  if (previous) {
+    previous.single = true
+    yield previous
+  }
+}
+
+const reduce = (file, ipld, options) => {
+  return async function (leaves) {
+    if (leaves.length === 1 && leaves[0].single && options.reduceSingleLeafToSelf) {
+      const leaf = leaves[0]
+
+      if (leaf.cid.codec === 'raw') {
+        // only one leaf node which is a buffer
+        const buffer = await ipld.get(leaf.cid)
+
+        leaf.unixfs = new UnixFS({
+          type: 'file',
+          mtime: file.mtime,
+          mode: file.mode,
+          data: buffer
+        })
+
+        const node = new DAGNode(leaf.unixfs.marshal())
+
+        leaf.cid = await ipld.put(node, mc.DAG_PB, options)
+        leaf.size = node.size
+      }
+
+      return {
+        cid: leaf.cid,
+        path: file.path,
+        unixfs: leaf.unixfs,
+        size: leaf.size
+      }
+    }
+
+    // create a parent node and add all the leaves
+    const f = new UnixFS({
+      type: 'file',
+      mtime: file.mtime,
+      mode: file.mode
+    })
+
+    const links = leaves
+      .filter(leaf => {
+        if (leaf.cid.codec === 'raw' && leaf.size) {
+          return true
+        }
+
+        if (!leaf.unixfs.data && leaf.unixfs.fileSize()) {
+          return true
+        }
+
+        return Boolean(leaf.unixfs.data.length)
+      })
+      .map((leaf) => {
+        if (leaf.cid.codec === 'raw') {
+          // node is a leaf buffer
+          f.addBlockSize(leaf.size)
+
+          return new DAGLink(leaf.name, leaf.size, leaf.cid)
+        }
+
+        if (!leaf.unixfs.data) {
+          // node is an intermediate node
+          f.addBlockSize(leaf.unixfs.fileSize())
+        } else {
+          // node is a unixfs 'file' leaf node
+          f.addBlockSize(leaf.unixfs.data.length)
+        }
+
+        return new DAGLink(leaf.name, leaf.size, leaf.cid)
+      })
+
+    const node = new DAGNode(f.marshal(), links)
+    const cid = await persist(node, ipld, options)
+
+    return {
+      cid,
+      path: file.path,
+      unixfs: f,
+      size: node.size
+    }
+  }
+}
+
+const fileBuilder = async (file, source, ipld, options) => {
+  const dagBuilder = dagBuilders[options.strategy]
+
+  if (!dagBuilder) {
+    throw errCode(new Error(`Unknown importer build strategy name: ${options.strategy}`), 'ERR_BAD_STRATEGY')
+  }
+
+  const roots = await all(dagBuilder(buildFileBatch(file, source, ipld, options), reduce(file, ipld, options), options))
+
+  if (roots.length > 1) {
+    throw errCode(new Error('expected a maximum of 1 roots and got ' + roots.length), 'ETOOMANYROOTS')
+  }
+
+  return roots[0]
+}
+
+module.exports = fileBuilder
diff --git a/packages/ipfs-unixfs-importer/src/dag-builder/file/trickle.js b/packages/ipfs-unixfs-importer/src/dag-builder/file/trickle.js
new file mode 100644
index 00000000..5149ff0b
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/dag-builder/file/trickle.js
@@ -0,0 +1,150 @@
+'use strict'
+
+const batch = require('it-batch')
+
+module.exports = function * trickleReduceToRoot (source, reduce, options) {
+  yield trickleStream(source, reduce, options)
+}
+
+async function trickleStream (source, reduce, options) {
+  let root
+  let iteration = 0
+  let maxDepth = 1
+  let subTree = root = new Root(options.layerRepeat)
+
+  for await (const layer of batch(source, options.maxChildrenPerNode)) {
+    if (subTree.isFull()) {
+      if (subTree !== root) {
+        root.addChild(await subTree.reduce(reduce))
+      }
+
+      if (iteration && iteration % options.layerRepeat === 0) {
+        maxDepth++
+      }
+
+      subTree = new SubTree(maxDepth, options.layerRepeat, iteration)
+
+      iteration++
+    }
+
+    subTree.append(layer)
+  }
+
+  if (subTree && subTree !== root) {
+    root.addChild(await subTree.reduce(reduce))
+  }
+
+  return root.reduce(reduce)
+}
+
+class SubTree {
+  constructor (maxDepth, layerRepeat, iteration) {
+    this.maxDepth = maxDepth
+    this.layerRepeat = layerRepeat
+    this.currentDepth = 1
+    this.iteration = iteration
+
+    this.root = this.node = this.parent = {
+      children: [],
+      depth: this.currentDepth,
+      maxDepth,
+      maxChildren: (this.maxDepth - this.currentDepth) * this.layerRepeat
+    }
+  }
+
+  isFull () {
+    if (!this.root.data) {
+      return false
+    }
+
+    if (this.currentDepth < this.maxDepth && this.node.maxChildren) {
+      // can descend
+      this._addNextNodeToParent(this.node)
+
+      return false
+    }
+
+    // try to find new node from node.parent
+    const distantRelative = this._findParent(this.node, this.currentDepth)
+
+    if (distantRelative) {
+      this._addNextNodeToParent(distantRelative)
+
+      return false
+    }
+
+    return true
+  }
+
+  _addNextNodeToParent (parent) {
+    this.parent = parent
+
+    // find site for new node
+    const nextNode = {
+      children: [],
+      depth: parent.depth + 1,
+      parent,
+      maxDepth: this.maxDepth,
+      maxChildren: Math.floor(parent.children.length / this.layerRepeat) * this.layerRepeat
+    }
+
+    parent.children.push(nextNode)
+
+    this.currentDepth = nextNode.depth
+    this.node = nextNode
+  }
+
+  append (layer) {
+    this.node.data = layer
+  }
+
+  reduce (reduce) {
+    return this._reduce(this.root, reduce)
+  }
+
+  async _reduce (node, reduce) {
+    let children = []
+
+    if (node.children.length) {
+      children = await Promise.all(
+        node.children
+          .filter(child => child.data)
+          .map(child => this._reduce(child, reduce))
+      )
+    }
+
+    return reduce(node.data.concat(children))
+  }
+
+  _findParent (node, depth) {
+    const parent = node.parent
+
+    if (!parent || parent.depth === 0) {
+      return
+    }
+
+    if (parent.children.length === parent.maxChildren || !parent.maxChildren) {
+      // this layer is full, may be able to traverse to a different branch
+      return this._findParent(parent, depth)
+    }
+
+    return parent
+  }
+}
+
+class Root extends SubTree {
+  constructor (layerRepeat) {
+    super(0, layerRepeat)
+
+    this.root.depth = 0
+    this.currentDepth = 1
+  }
+
+  addChild (child) {
+    this.root.children.push(child)
+  }
+
+  reduce (reduce) {
+    return reduce(this.root.data.concat(this.root.children))
+  }
+}
diff --git a/packages/ipfs-unixfs-importer/src/dag-builder/index.js b/packages/ipfs-unixfs-importer/src/dag-builder/index.js
new file mode 100644
index 00000000..a55888d4
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/dag-builder/index.js
@@ -0,0 +1,58 @@
+'use strict'
+
+const dirBuilder = require('./dir')
+const fileBuilder = require('./file')
+
+async function * dagBuilder (source, ipld, options) {
+  for await (const entry of source) {
+    if (entry.path) {
+      if (entry.path.substring(0, 2) === './') {
+        options.wrapWithDirectory = true
+      }
+
+      entry.path = entry.path
+        .split('/')
+        .filter(path => path && path !== '.')
+        .join('/')
+    }
+
+    if (entry.content) {
+      let source = entry.content
+
+      // wrap in iterator if it is array-like or not an iterator
+      if ((!source[Symbol.asyncIterator] && !source[Symbol.iterator]) || source.length !== undefined) {
+        source = {
+          [Symbol.iterator]: function * () {
+            yield entry.content
+          }
+        }
+      }
+
+      let chunker
+
+      if (typeof options.chunker === 'function') {
+        chunker = options.chunker
+      } else if (options.chunker === 'rabin') {
+        chunker = require('../chunker/rabin')
+      } else {
+        chunker = require('../chunker/fixed-size')
+      }
+
+      let chunkValidator
+
+      if (typeof options.chunkValidator === 'function') {
+        chunkValidator = options.chunkValidator
+      } else {
+        chunkValidator = require('./validate-chunks')
+      }
+
+      // item is a file
+      yield () => fileBuilder(entry, chunker(chunkValidator(source, options), options), ipld, options)
+    } else {
+      // item is a directory
+      yield () => dirBuilder(entry, ipld, options)
+    }
+  }
+}
+
+module.exports = dagBuilder
diff --git a/packages/ipfs-unixfs-importer/src/dag-builder/validate-chunks.js b/packages/ipfs-unixfs-importer/src/dag-builder/validate-chunks.js
new file mode 100644
index 00000000..bf3037d3
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/dag-builder/validate-chunks.js
@@ -0,0 +1,22 @@
+'use strict'
+
+const errCode = require('err-code')
+
+// make sure the content only emits buffer-a-likes
+async function * validateChunks (source) {
+  for await (const content of source) {
+    if (content.length === undefined) {
+      throw errCode(new Error('Content was invalid'), 'ERR_INVALID_CONTENT')
+    }
+
+    if (typeof content === 'string' || content instanceof String) {
+      yield Buffer.from(content, 'utf8')
+    } else if (Array.isArray(content)) {
+      yield Buffer.from(content)
+    } else {
+      yield content
+    }
+  }
+}
+
+module.exports = validateChunks
diff --git a/packages/ipfs-unixfs-importer/src/dir-flat.js b/packages/ipfs-unixfs-importer/src/dir-flat.js
new file mode 100644
index 00000000..50866044
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/dir-flat.js
@@ -0,0 +1,92 @@
+'use strict'
+
+const {
+  DAGLink,
+  DAGNode
+} = require('ipld-dag-pb')
+const UnixFS = require('ipfs-unixfs')
+const Dir = require('./dir')
+const persist = require('./utils/persist')
+
+class DirFlat extends Dir {
+  constructor (props, options) {
+    super(props, options)
+    this._children = {}
+  }
+
+  put (name, value) {
+    this.cid = undefined
+    this.size = undefined
+
+    this._children[name] = value
+  }
+
+  get (name) {
+    return this._children[name]
+  }
+
+  childCount () {
+    return Object.keys(this._children).length
+  }
+
+  directChildrenCount () {
+    return this.childCount()
+  }
+
+  onlyChild () {
+    return this._children[Object.keys(this._children)[0]]
+  }
+
+  * eachChildSeries () {
+    const keys = Object.keys(this._children)
+
+    for (let i = 0; i < keys.length; i++) {
+      const key = keys[i]
+
+      yield {
+        key: key,
+        child: this._children[key]
+      }
+    }
+  }
+
+  async * flush (path, ipld) {
+    const children = Object.keys(this._children)
+    const links = []
+
+    for (let i = 0; i < children.length; i++) {
+      let child = this._children[children[i]]
+
+      if (typeof child.flush === 'function') {
+        for await (const entry of child.flush(child.path, ipld)) {
+          child = entry
+
+          yield child
+        }
+      }
+
+      links.push(new DAGLink(children[i], child.size, child.cid))
+    }
+
+    const unixfs = new UnixFS({
+      type: 'directory',
+      mtime: this.mtime,
+      mode: this.mode
+    })
+
+    const node = new DAGNode(unixfs.marshal(), links)
+    const cid = await persist(node, ipld, this.options)
+
+    this.cid = cid
+    this.size = node.size
+
+    yield {
+      cid,
+      unixfs,
+      path,
+      size: node.size
+    }
+  }
+}
+
+module.exports = DirFlat
diff --git a/packages/ipfs-unixfs-importer/src/dir-sharded.js b/packages/ipfs-unixfs-importer/src/dir-sharded.js
new file mode 100644
index 00000000..e2959845
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/dir-sharded.js
@@ -0,0 +1,162 @@
+'use strict'
+
+const {
+  DAGLink,
+  DAGNode
+} = require('ipld-dag-pb')
+const UnixFS = require('ipfs-unixfs')
+const multihashing = require('multihashing-async')
+const Dir = require('./dir')
+const persist = require('./utils/persist')
+const Bucket = require('hamt-sharding')
+const mergeOptions = require('merge-options').bind({ ignoreUndefined: true })
+
+const hashFn = async function (value) {
+  const hash = await multihashing(Buffer.from(value, 'utf8'), 'murmur3-128')
+
+  // Multihashing inserts preamble of 2 bytes. Remove it.
+  // Also, murmur3 outputs 128 bit but, accidently, IPFS Go's
+  // implementation only uses the first 64, so we must do the same
+  // for parity..
+  const justHash = hash.slice(2, 10)
+  const length = justHash.length
+  const result = Buffer.alloc(length)
+  // TODO: invert buffer because that's how Go impl does it
+  for (let i = 0; i < length; i++) {
+    result[length - i - 1] = justHash[i]
+  }
+
+  return result
+}
+hashFn.code = 0x22 // TODO: get this from multihashing-async?
+
+const defaultOptions = {
+  hamtHashFn: hashFn,
+  hamtBucketBits: 8
+}
+
+class DirSharded extends Dir {
+  constructor (props, options) {
+    options = mergeOptions(defaultOptions, options)
+
+    super(props, options)
+
+    this._bucket = Bucket({
+      hashFn: options.hamtHashFn,
+      bits: options.hamtBucketBits
+    })
+  }
+
+  async put (name, value) {
+    await this._bucket.put(name, value)
+  }
+
+  get (name) {
+    return this._bucket.get(name)
+  }
+
+  childCount () {
+    return this._bucket.leafCount()
+  }
+
+  directChildrenCount () {
+    return this._bucket.childrenCount()
+  }
+
+  onlyChild () {
+    return this._bucket.onlyChild()
+  }
+
+  async * eachChildSeries () {
+    for await (const { key, value } of this._bucket.eachLeafSeries()) {
+      yield {
+        key,
+        child: value
+      }
+    }
+  }
+
+  async * flush (path, ipld) {
+    for await (const entry of flush(path, this._bucket, ipld, this, this.options)) {
+      yield entry
+    }
+  }
+}
+
+module.exports = DirSharded
+
+module.exports.hashFn = hashFn
+
+async function * flush (path, bucket, ipld, shardRoot, options) {
+  const children = bucket._children
+  const links = []
+
+  for (let i = 0; i < children.length; i++) {
+    const child = children.get(i)
+
+    if (!child) {
+      continue
+    }
+
+    const labelPrefix = i.toString(16).toUpperCase().padStart(2, '0')
+
+    if (Bucket.isBucket(child)) {
+      let shard
+
+      for await (const subShard of await flush('', child, ipld, null, options)) {
+        shard = subShard
+      }
+
+      links.push(new DAGLink(labelPrefix, shard.size, shard.cid))
+    } else if (typeof child.value.flush === 'function') {
+      const dir = child.value
+      let flushedDir
+
+      for await (const entry of dir.flush(dir.path, ipld)) {
+        flushedDir = entry
+
+        yield flushedDir
+      }
+
+      const label = labelPrefix + child.key
+      links.push(new DAGLink(label, flushedDir.size, flushedDir.cid))
+    } else {
+      const value = child.value
+
+      if (!value.node) {
+        if (value.cid) {
+          value.node = await ipld.get(value.cid)
+        } else {
+          continue
+        }
+      }
+
+      const label = labelPrefix + child.key
+      const size = value.node.length || value.node.size || value.node.Size
+
+      links.push(new DAGLink(label, size, value.cid))
+    }
+  }
+
+  // go-ipfs uses little endian, that's why we have to
+  // reverse the bit field before storing it
+  const data = Buffer.from(children.bitField().reverse())
+  const dir = new UnixFS({
+    type: 'hamt-sharded-directory',
+    data,
+    fanout: bucket.tableSize(),
+    hashType: options.hamtHashFn.code,
+    mtime: shardRoot && shardRoot.mtime,
+    mode: shardRoot && shardRoot.mode
+  })
+
+  const node = new DAGNode(dir.marshal(), links)
+  const cid = await persist(node, ipld, options)
+
+  yield {
+    cid,
+    unixfs: dir,
+    path,
+    size: node.size
+  }
+}
diff --git a/packages/ipfs-unixfs-importer/src/dir.js b/packages/ipfs-unixfs-importer/src/dir.js
new file mode 100644
index 00000000..24a1023c
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/dir.js
@@ -0,0 +1,8 @@
+'use strict'
+
+module.exports = class Dir {
+  constructor (props, options) {
+    this.options = options || {}
+    Object.assign(this, props)
+  }
+}
diff --git a/packages/ipfs-unixfs-importer/src/flat-to-shard.js b/packages/ipfs-unixfs-importer/src/flat-to-shard.js
new file mode 100644
index 00000000..1617b4dc
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/flat-to-shard.js
@@ -0,0 +1,47 @@
+'use strict'
+
+const DirSharded = require('./dir-sharded')
+
+module.exports = async function flatToShard (child, dir, threshold, options) {
+  let newDir = dir
+
+  if (dir.flat && dir.directChildrenCount() >= threshold) {
+    newDir = await convertToShard(dir, options)
+  }
+
+  const parent = newDir.parent
+
+  if (parent) {
+    if (newDir !== dir) {
+      if (child) {
+        child.parent = newDir
+      }
+
+      await parent.put(newDir.parentKey, newDir)
+    }
+
+    return flatToShard(newDir, parent, threshold, options)
+  }
+
+  return newDir
+}
+
+async function convertToShard (oldDir, options) {
+  const newDir = new DirSharded({
+    root: oldDir.root,
+    dir: true,
+    parent: oldDir.parent,
+    parentKey: oldDir.parentKey,
+    path: oldDir.path,
+    dirty: oldDir.dirty,
+    flat: false,
+    mtime: oldDir.mtime,
+    mode: oldDir.mode
+  }, options)
+
+  for await (const { key, child } of oldDir.eachChildSeries()) {
+    await newDir.put(key, child)
+  }
+
+  return newDir
+}
diff --git a/packages/ipfs-unixfs-importer/src/index.js b/packages/ipfs-unixfs-importer/src/index.js
new file mode 100644
index 00000000..052acff3
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/index.js
@@ -0,0 +1,85 @@
+'use strict'
+
+const parallelBatch = require('it-parallel-batch')
+const mergeOptions = require('merge-options').bind({ ignoreUndefined: true })
+
+const defaultOptions = {
+  chunker: 'fixed',
+  strategy: 'balanced', // 'flat', 'trickle'
+  rawLeaves: false,
+  onlyHash: false,
+  reduceSingleLeafToSelf: true,
+  codec: 'dag-pb',
+  hashAlg: 'sha2-256',
+  leafType: 'file', // 'raw'
+  cidVersion: 0,
+  progress: () => () => {},
+  shardSplitThreshold: 1000,
+  fileImportConcurrency: 50,
+  blockWriteConcurrency: 10,
+  minChunkSize: 262144,
+  maxChunkSize: 262144,
+  avgChunkSize: 262144,
+  window: 16,
+  polynomial: 17437180132763653, // https://github.com/ipfs/go-ipfs-chunker/blob/d0125832512163708c0804a3cda060e21acddae4/rabin.go#L11
+  maxChildrenPerNode: 174,
+  layerRepeat: 4,
+  wrapWithDirectory: false,
+  pin: true,
+  recursive: false,
+  hidden: false,
+  preload: true,
+  chunkValidator: null,
+  importBuffer: null
+}
+
+module.exports = async function * (source, ipld, options = {}) {
+  const opts = mergeOptions(defaultOptions, options)
+
+  if (options.cidVersion > 0 && options.rawLeaves === undefined) {
+    // if the cid version is 1 or above, use raw leaves as this is
+    // what go does.
+    opts.rawLeaves = true
+  }
+
+  if (options.hashAlg !== undefined && options.rawLeaves === undefined) {
+    // if a non-default hash alg has been specified, use raw leaves as this is
+    // what go does.
+    opts.rawLeaves = true
+  }
+
+  // go-ifps trickle dag defaults to unixfs raw leaves, balanced dag defaults to file leaves
+  if (options.strategy === 'trickle') {
+    opts.leafType = 'raw'
+    opts.reduceSingleLeafToSelf = false
+  }
+
+  if (options.format) {
+    opts.codec = options.format
+  }
+
+  let dagBuilder
+
+  if (typeof options.dagBuilder === 'function') {
+    dagBuilder = options.dagBuilder
+  } else {
+    dagBuilder = require('./dag-builder')
+  }
+
+  let treeBuilder
+
+  if (typeof options.treeBuilder === 'function') {
+    treeBuilder = options.treeBuilder
+  } else {
+    treeBuilder = require('./tree-builder')
+  }
+
+  for await (const entry of treeBuilder(parallelBatch(dagBuilder(source, ipld, opts), opts.fileImportConcurrency), ipld, opts)) {
+    yield {
+      cid: entry.cid,
+      path: entry.path,
+      unixfs: entry.unixfs,
+      size: entry.size
+    }
+  }
+}
diff --git a/packages/ipfs-unixfs-importer/src/tree-builder.js b/packages/ipfs-unixfs-importer/src/tree-builder.js
new file mode 100644
index 00000000..feb9f42d
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/tree-builder.js
@@ -0,0 +1,100 @@
+'use strict'
+
+const DirFlat = require('./dir-flat')
+const flatToShard = require('./flat-to-shard')
+const Dir = require('./dir')
+const toPathComponents = require('./utils/to-path-components')
+const errCode = require('err-code')
+const first = require('it-first')
+
+async function addToTree (elem, tree, options) {
+  const pathElems = toPathComponents(elem.path || '')
+  const lastIndex = pathElems.length - 1
+  let parent = tree
+  let currentPath = ''
+
+  for (let i = 0; i < pathElems.length; i++) {
+    const pathElem = pathElems[i]
+
+    currentPath += `${currentPath ? '/' : ''}${pathElem}`
+
+    const last = (i === lastIndex)
+    parent.dirty = true
+    parent.cid = null
+    parent.size = null
+
+    if (last) {
+      await parent.put(pathElem, elem)
+      tree = await flatToShard(null, parent, options.shardSplitThreshold, options)
+    } else {
+      let dir = await parent.get(pathElem)
+
+      if (!dir || !(dir instanceof Dir)) {
+        dir = new DirFlat({
+          dir: true,
+          parent: parent,
+          parentKey: pathElem,
+          path: currentPath,
+          dirty: true,
+          flat: true,
+          mtime: dir && dir.unixfs && dir.unixfs.mtime,
+          mode: dir && dir.unixfs && dir.unixfs.mode
+        }, options)
+      }
+
+      await parent.put(pathElem, dir)
+
+      parent = dir
+    }
+  }
+
+  return tree
+}
+
+async function * treeBuilder (source, ipld, options) {
+  let tree = new DirFlat({
+    root: true,
+    dir: true,
+    path: '',
+    dirty: true,
+    flat: true
+  }, options)
+
+  for await (const entry of source) {
+    if (!entry) {
+      continue
+    }
+
+    tree = await addToTree(entry, tree, options)
+
+    if (!entry.unixfs || !entry.unixfs.isDirectory()) {
+      yield entry
+    }
+  }
+
+  if (!options.wrapWithDirectory) {
+    if (tree.childCount() > 1) {
+      throw errCode(new Error('detected more than one root'), 'ERR_MORE_THAN_ONE_ROOT')
+    }
+
+    const unwrapped = await first(tree.eachChildSeries())
+
+    if (!unwrapped) {
+      return
+    }
+
+    tree = unwrapped.child
+  }
+
+  if (!(tree instanceof Dir)) {
+    if (tree && tree.unixfs && tree.unixfs.isDirectory()) {
+      yield tree
+    }
+
+    return
+  }
+
+  yield * tree.flush(tree.path, ipld)
+}
+
+module.exports = treeBuilder
diff --git a/packages/ipfs-unixfs-importer/src/utils/persist.js b/packages/ipfs-unixfs-importer/src/utils/persist.js
new file mode 100644
index 00000000..e6970b65
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/utils/persist.js
@@ -0,0 +1,33 @@
+'use strict'
+
+const mh = require('multihashing-async').multihash
+const mc = require('multicodec')
+
+const persist = (node, ipld, options) => {
+  if (!options.codec && node.length) {
+    options.cidVersion = 1
+    options.codec = 'raw'
+  }
+
+  if (!options.codec) {
+    options.codec = 'dag-pb'
+  }
+
+  if (isNaN(options.hashAlg)) {
+    options.hashAlg = mh.names[options.hashAlg]
+  }
+
+  if (options.hashAlg !== mh.names['sha2-256']) {
+    options.cidVersion = 1
+  }
+
+  if (options.format) {
+    options.codec = options.format
+  }
+
+  const format = mc[options.codec.toUpperCase().replace(/-/g, '_')]
+
+  return ipld.put(node, format, options)
+}
+
+module.exports = persist
diff --git a/packages/ipfs-unixfs-importer/src/utils/to-path-components.js b/packages/ipfs-unixfs-importer/src/utils/to-path-components.js
new file mode 100644
index 00000000..5e826272
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/utils/to-path-components.js
@@ -0,0 +1,11 @@
+'use strict'
+
+const toPathComponents = (path = '') => {
+  // split on / unless escaped with \
+  return (path
+    .trim()
+    .match(/([^\\^/]|\\\/)+/g) || [])
+    .filter(Boolean)
+}
+
+module.exports = toPathComponents
diff --git a/packages/ipfs-unixfs-importer/test/benchmark.spec.js b/packages/ipfs-unixfs-importer/test/benchmark.spec.js
new file mode 100644
index 00000000..fae3f483
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/benchmark.spec.js
@@ -0,0 +1,70 @@
+/* eslint-env mocha */
+'use strict'
+
+const importer = require('../src')
+
+const IPLD = require('ipld')
+const inMemory = require('ipld-in-memory')
+const bufferStream = require('it-buffer-stream')
+const all = require('it-all')
+
+const REPEATS = 10
+const FILE_SIZE = Math.pow(2, 20) * 500 // 500MB
+const CHUNK_SIZE = 65536
+
+describe.skip('benchmark', function () {
+  this.timeout(30 * 1000)
+
+  let ipld
+
+  before(async () => {
+    ipld = await inMemory(IPLD)
+  })
+
+  const times = []
+
+  after(() => {
+    console.info('Percent\tms') // eslint-disable-line no-console
+    times.forEach((time, index) => {
+      console.info(`${index}\t${parseInt(time / REPEATS)}`) // eslint-disable-line no-console
+    })
+  })
+
+  for (let i = 0; i < REPEATS; i++) {
+    it(`run ${i}`, async () => { // eslint-disable-line no-loop-func
+      this.timeout(0)
+
+      const size = FILE_SIZE
+      let read = 0
+      let lastDate = Date.now()
+      let lastPercent = 0
+
+      const options = {
+        progress: (prog) => {
+          read += prog
+
+          const percent = parseInt((read / size) * 100)
+
+          if (percent > lastPercent) {
+            times[percent] = (times[percent] || 0) + (Date.now() - lastDate)
+
+            lastDate = Date.now()
+            lastPercent = percent
+          }
+        }
+      }
+
+      const buf = Buffer.alloc(CHUNK_SIZE).fill(0)
+
+      await all(importer([{
+        path: '200Bytes.txt',
+        content: bufferStream(size, {
+          chunkSize: CHUNK_SIZE,
+          generator: () => {
+            return buf
+          }
+        })
+      }], ipld, options))
+    })
+  }
+})
diff --git a/packages/ipfs-unixfs-importer/test/builder-balanced.spec.js b/packages/ipfs-unixfs-importer/test/builder-balanced.spec.js
new file mode 100644
index 00000000..17242a31
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/builder-balanced.spec.js
@@ -0,0 +1,70 @@
+/* eslint-env mocha */
+'use strict'
+
+const chai = require('chai')
+chai.use(require('dirty-chai'))
+const expect = chai.expect
+const builder = require('../src/dag-builder/file/balanced')
+const all = require('it-all')
+
+function reduce (leaves) {
+  if (leaves.length > 1) {
+    return { children: leaves }
+  } else {
+    return leaves[0]
+  }
+}
+
+const options = {
+  maxChildrenPerNode: 3
+}
+
+describe('builder: balanced', () => {
+  it('reduces one value into itself', async () => {
+    const source = [1]
+
+    const result = await all(builder(source, reduce, options))
+
+    expect(result).to.deep.equal(source)
+  })
+
+  it('reduces 3 values into parent', async () => {
+    const source = [1, 2, 3]
+
+    const result = await all(builder(source, reduce, options))
+
+    expect(result).to.deep.equal([{
+      children: [1, 2, 3]
+    }])
+  })
+
+  it('obeys max children per node', async () => {
+    const source = [1, 2, 3, 4]
+
+    const result = await all(builder(source, reduce, options))
+
+    expect(result).to.deep.equal([{
+      children: [{
+        children: [1, 2, 3]
+      },
+      4
+      ]
+    }])
+  })
+
+  it('refolds 2 parent nodes', async () => {
+    const source = [1, 2, 3, 4, 5, 6, 7]
+
+    const result = await all(builder(source, reduce, options))
+
+    expect(result).to.deep.equal([{
+      children: [{
+        children: [1, 2, 3]
+      }, {
+        children: [4, 5, 6]
+      },
+      7
+      ]
+    }])
+  })
+})
diff --git a/packages/ipfs-unixfs-importer/test/builder-dir-sharding.spec.js b/packages/ipfs-unixfs-importer/test/builder-dir-sharding.spec.js
new file mode 100644
index 00000000..b52b07b8
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/builder-dir-sharding.spec.js
@@ -0,0 +1,294 @@
+/* eslint-env mocha */
+'use strict'
+
+const importer = require('../src')
+const exporter = require('ipfs-unixfs-exporter')
+
+const chai = require('chai')
+chai.use(require('dirty-chai'))
+const expect = chai.expect
+const IPLD = require('ipld')
+const inMemory = require('ipld-in-memory')
+const all = require('it-all')
+const last = require('it-last')
+
+describe('builder: directory sharding', () => {
+  let ipld
+
+  before(async () => {
+    ipld = await inMemory(IPLD)
+  })
+
+  describe('basic dirbuilder', () => {
+    it('yields a non-sharded dir', async () => {
+      const content = Buffer.from('i have the best bytes')
+      const nodes = await all(importer([{
+        path: 'a/b',
+        content
+      }], ipld, {
+        shardSplitThreshold: Infinity // never shard
+      }))
+
+      expect(nodes.length).to.equal(2)
+
+      expect(nodes[0].path).to.equal('a/b')
+      expect(nodes[1].path).to.equal('a')
+
+      const dirNode = await exporter(nodes[1].cid, ipld)
+      expect(dirNode.unixfs.type).to.equal('directory')
+
+      const fileNode = await exporter(nodes[0].cid, ipld)
+      expect(fileNode.unixfs.type).to.equal('file')
+      expect(Buffer.concat(await all(fileNode.content()))).to.deep.equal(content)
+    })
+
+    it('yields a sharded dir', async () => {
+      const nodes = await all(importer([{
+        path: 'a/b',
+        content: Buffer.from('i have the best bytes')
+      }], ipld, {
+        shardSplitThreshold: 0 // always shard
+      }))
+
+      expect(nodes.length).to.equal(2)
+      expect(nodes[0].path).to.equal('a/b')
+      expect(nodes[1].path).to.equal('a')
+
+      const node = await exporter(nodes[1].cid, ipld)
+
+      expect(node.unixfs.type).to.equal('hamt-sharded-directory')
+    })
+
+    it('exporting unsharded hash results in the correct files', async () => {
+      const content = 'i have the best bytes'
+      const nodes = await all(importer([{
+        path: 'a/b',
+        content: Buffer.from(content)
+      }], ipld, {
+        shardSplitThreshold: Infinity // never shard
+      }))
+
+      const nonShardedHash = nodes[1].cid
+
+      const dir = await exporter(nonShardedHash, ipld)
+      const files = await all(dir.content())
+
+      expect(files.length).to.equal(1)
+
+      const expectedHash = nonShardedHash.toBaseEncodedString()
+
+      expect(dir.path).to.be.eql(expectedHash)
+      expect(dir.cid.toBaseEncodedString()).to.be.eql(expectedHash)
+      expect(files[0].path).to.be.eql(expectedHash + '/b')
+      expect(files[0].unixfs.fileSize()).to.be.eql(content.length)
+
+      const fileContent = Buffer.concat(await all(files[0].content()))
+
+      expect(fileContent.toString()).to.equal(content)
+    })
+
+    it('exporting sharded hash results in the correct files', async () => {
+      const content = 'i have the best bytes'
+      const nodes = await all(importer([{
+        path: 'a/b',
+        content: Buffer.from(content)
+      }], ipld, {
+        shardSplitThreshold: 0 // always shard
+      }))
+
+      const shardedHash = nodes[1].cid
+
+      const dir = await exporter(shardedHash, ipld)
+      const files = await all(dir.content())
+
+      expect(files.length).to.equal(1)
+
+      const expectedHash = shardedHash.toBaseEncodedString()
+
+      expect(dir.path).to.be.eql(expectedHash)
+      expect(dir.cid.toBaseEncodedString()).to.be.eql(expectedHash)
+      expect(files[0].path).to.be.eql(expectedHash + '/b')
+      expect(files[0].unixfs.fileSize()).to.be.eql(content.length)
+
+      const fileContent = Buffer.concat(await all(files[0].content()))
+
+      expect(fileContent.toString()).to.equal(content)
+    })
+  })
+
+  describe('big dir', function () {
+    this.timeout(30 * 1000)
+
+    const maxDirs = 2000
+
+    it('imports a big dir', async () => {
+      const source = {
+        [Symbol.iterator]: function * () {
+          for (let i = 0; i < maxDirs; i++) {
+            yield {
+              path: 'big/' + i.toString().padStart(4, '0'),
+              content: Buffer.from(i.toString())
+            }
+          }
+        }
+      }
+
+      const nodes = await all(importer(source, ipld))
+
+      expect(nodes.length).to.equal(maxDirs + 1)
+      const last = nodes[nodes.length - 1]
+      expect(last.path).to.equal('big')
+    })
+
+    it('exports a big dir', async () => {
+      const source = {
+        [Symbol.iterator]: function * () {
+          for (let i = 0; i < maxDirs; i++) {
+            yield {
+              path: 'big/' + i.toString().padStart(4, '0'),
+              content: Buffer.from(i.toString())
+            }
+          }
+        }
+      }
+
+      const nodes = await all(importer(source, ipld))
+
+      expect(nodes.length).to.equal(maxDirs + 1) // files plus the containing directory
+
+      const dir = await exporter(nodes[nodes.length - 1].cid, ipld)
+
+      for await (const entry of dir.content()) {
+        const content = Buffer.concat(await all(entry.content()))
+        expect(content.toString()).to.equal(parseInt(entry.name, 10).toString())
+      }
+    })
+  })
+
+  describe('big nested dir', function () {
+    this.timeout(450 * 1000)
+
+    const maxDirs = 2000
+    const maxDepth = 3
+    let rootHash
+
+    before(async () => {
+      const source = {
+        [Symbol.iterator]: function * () {
+          let pending = maxDirs
+          let pendingDepth = maxDepth
+          let i = 0
+          let depth = 1
+
+          while (pendingDepth && pending) {
+            i++
+            const dir = []
+
+            for (let d = 0; d < depth; d++) {
+              dir.push('big')
+            }
+
+            yield {
+              path: dir.concat(i.toString().padStart(4, '0')).join('/'),
+              content: Buffer.from(i.toString())
+            }
+
+            pending--
+            if (!pending) {
+              pendingDepth--
+              pending = maxDirs
+              i = 0
+              depth++
+            }
+          }
+        }
+      }
+
+      const node = await last(importer(source, ipld))
+      expect(node.path).to.equal('big')
+
+      rootHash = node.cid
+    })
+
+    it('imports a big dir', async () => {
+      const dir = await exporter(rootHash, ipld)
+
+      const verifyContent = async (node) => {
+        if (node.unixfs.type === 'file') {
+          const bufs = await all(node.content())
+          const content = Buffer.concat(bufs)
+          expect(content.toString()).to.equal(parseInt(node.name, 10).toString())
+        } else {
+          for await (const entry of node.content()) {
+            await verifyContent(entry)
+          }
+        }
+      }
+
+      await verifyContent(dir)
+    })
+
+    it('exports a big dir', async () => {
+      const collectContent = async (node, entries = {}) => {
+        if (node.unixfs.type === 'file') {
+          entries[node.path] = {
+            content: Buffer.concat(await all(node.content())).toString()
+          }
+        } else {
+          entries[node.path] = node
+
+          for await (const entry of node.content()) {
+            await collectContent(entry, entries)
+          }
+        }
+
+        return entries
+      }
+
+      const eachPath = (path) => {
+        if (!index) {
+          // first dir
+          if (depth === 1) {
+            expect(path).to.equal(dir.cid.toBaseEncodedString())
+          }
+
+          const entry = entries[path]
+          expect(entry).to.exist()
+          expect(entry.content).to.not.be.a('string')
+        } else {
+          // dir entries
+          const pathElements = path.split('/')
+          expect(pathElements.length).to.equal(depth + 1)
+          const lastElement = pathElements[pathElements.length - 1]
+          expect(lastElement).to.equal(index.toString().padStart(4, '0'))
+          expect(entries[path].content).to.equal(index.toString())
+        }
+        index++
+        if (index > maxDirs) {
+          index = 0
+          depth++
+        }
+      }
+
+      const dir = await exporter(rootHash, ipld)
+
+      const entries = await collectContent(dir)
+      let index = 0
+      let depth = 1
+
+      const paths = Object.keys(entries).sort()
+      expect(paths.length).to.equal(maxDepth * maxDirs + maxDepth)
+      paths.forEach(eachPath)
+    })
+
+    it('exports a big dir with subpath', async () => {
+      const exportHash = rootHash.toBaseEncodedString() + '/big/big/2000'
+
+      const node = await exporter(exportHash, ipld)
+      expect(node.path).to.equal(exportHash)
+
+      const content = Buffer.concat(await all(node.content()))
+      expect(content.toString()).to.equal('2000')
+    })
+  })
+})
diff --git a/packages/ipfs-unixfs-importer/test/builder-flat.spec.js b/packages/ipfs-unixfs-importer/test/builder-flat.spec.js
new file mode 100644
index 00000000..e3f0339e
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/builder-flat.spec.js
@@ -0,0 +1,32 @@
+/* eslint-env mocha */
+'use strict'
+
+const chai = require('chai')
+chai.use(require('dirty-chai'))
+const expect = chai.expect
+const builder = require('../src/dag-builder/file/flat')
+const all = require('it-all')
+
+function reduce (leaves) {
+  if (leaves.length > 1) {
+    return { children: leaves }
+  } else {
+    return leaves[0]
+  }
+}
+
+describe('builder: flat', () => {
+  it('reduces one value into itself', async () => {
+    const source = [1]
+    const result = await all(builder(source, reduce))
+
+    expect(result).to.be.eql([1])
+  })
+
+  it('reduces 2 values into parent', async () => {
+    const source = [1, 2]
+    const result = await all(builder(source, reduce))
+
+    expect(result).to.be.eql([{ children: [1, 2] }])
+  })
+})
diff --git a/packages/ipfs-unixfs-importer/test/builder-only-hash.spec.js b/packages/ipfs-unixfs-importer/test/builder-only-hash.spec.js
new file mode 100644
index 00000000..e7e7642c
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/builder-only-hash.spec.js
@@ -0,0 +1,47 @@
+/* eslint-env mocha */
+'use strict'
+
+const chai = require('chai')
+chai.use(require('dirty-chai'))
+const expect = chai.expect
+const IPLD = require('ipld')
+const inMemory = require('ipld-in-memory')
+const builder = require('../src/dag-builder')
+const all = require('it-all')
+
+describe('builder: onlyHash', () => {
+  let ipld
+
+  before(async () => {
+    ipld = await inMemory(IPLD)
+  })
+
+  it('will only chunk and hash if passed an "onlyHash" option', async () => {
+    const nodes = await all(builder([{
+      path: 'foo.txt',
+      content: Buffer.from([0, 1, 2, 3, 4])
+    }], ipld, {
+      onlyHash: true,
+      chunker: 'fixed',
+      strategy: 'balanced',
+      progress: () => {},
+      leafType: 'file',
+      reduceSingleLeafToSelf: true,
+      format: 'dag-pb',
+      hashAlg: 'sha2-256',
+      wrap: true,
+      maxChunkSize: 1024,
+      maxChildrenPerNode: 254
+    }))
+
+    expect(nodes.length).to.equal(1)
+
+    try {
+      await ipld.get((await nodes[0]()).cid)
+
+      throw new Error('Should have errored')
+    } catch (err) {
+      expect(err.code).to.equal('ERR_NOT_FOUND')
+    }
+  })
+})
diff --git a/packages/ipfs-unixfs-importer/test/builder-trickle-dag.spec.js b/packages/ipfs-unixfs-importer/test/builder-trickle-dag.spec.js
new file mode 100644
index 00000000..ba6c239a
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/builder-trickle-dag.spec.js
@@ -0,0 +1,574 @@
+/* eslint-env mocha */
+'use strict'
+
+const chai = require('chai')
+chai.use(require('dirty-chai'))
+const expect = chai.expect
+const builder = require('../src/dag-builder/file/trickle')
+const all = require('it-all')
+
+const createValues = (max) => {
+  const output = []
+
+  for (let i = 0; i < max; i++) {
+    output.push(i)
+  }
+
+  return output
+}
+
+function reduce (leaves) {
+  if (leaves.length > 1) {
+    return { children: leaves }
+  } else {
+    return leaves[0]
+  }
+}
+
+const options = {
+  maxChildrenPerNode: 3,
+  layerRepeat: 2
+}
+
+describe('builder: trickle', () => {
+  it('reduces one value into itself', async () => {
+    const result = await all(builder([1], reduce, options))
+
+    expect(result).to.deep.equal([1])
+  })
+
+  it('reduces 3 values into parent', async () => {
+    const result = await all(builder(createValues(3), reduce, options))
+
+    expect(result).to.deep.equal([{
+      children: [
+        0,
+        1,
+        2
+      ]
+    }])
+  })
+
+  it('reduces 6 values correctly', async () => {
+    const result = await all(builder(createValues(6), reduce, options))
+
+    expect(result).to.deep.equal([{
+      children: [
+        0,
+        1,
+        2,
+        {
+          children: [
+            3,
+            4,
+            5
+          ]
+        }
+      ]
+    }])
+  })
+
+  it('reduces 9 values correctly', async () => {
+    const result = await all(builder(createValues(9), reduce, options))
+
+    expect(result).to.deep.equal([{
+      children: [
+        0,
+        1,
+        2,
+        {
+          children: [
+            3,
+            4,
+            5
+          ]
+        },
+        {
+          children: [
+            6,
+            7,
+            8
+          ]
+        }
+      ]
+    }])
+  })
+
+  it('reduces 12 values correctly', async () => {
+    const result = await all(builder(createValues(12), reduce, options))
+
+    expect(result).to.deep.equal([{
+      children: [
+        0,
+        1,
+        2,
+        {
+          children: [
+            3,
+            4,
+            5
+          ]
+        },
+        {
+          children: [
+            6,
+            7,
+            8
+          ]
+        },
+        {
+          children: [
+            9,
+            10,
+            11
+          ]
+        }
+      ]
+    }])
+  })
+
+  it('reduces 21 values correctly', async () => {
+    const result = await all(builder(createValues(21), reduce, options))
+
+    expect(result).to.deep.equal([{
+      children: [
+        0,
+        1,
+        2,
+        {
+          children: [
+            3,
+            4,
+            5
+          ]
+        },
+        {
+          children: [
+            6,
+            7,
+            8
+          ]
+        },
+        {
+          children: [
+            9,
+            10,
+            11,
+            {
+              children: [
+                12,
+                13,
+                14
+              ]
+            },
+            {
+              children: [
+                15,
+                16,
+                17
+              ]
+            }
+          ]
+        },
+        {
+          children: [
+            18,
+            19,
+            20
+          ]
+        }
+      ]
+    }])
+  })
+
+  it('reduces 68 values correctly', async () => {
+    const result = await all(builder(createValues(68), reduce, options))
+
+    expect(result).to.deep.equal([
+      {
+        children: [
+          0,
+          1,
+          2,
+          {
+            children: [
+              3,
+              4,
+              5
+            ]
+          },
+          {
+            children: [
+              6,
+              7,
+              8
+            ]
+          },
+          {
+            children: [
+              9,
+              10,
+              11,
+              {
+                children: [
+                  12,
+                  13,
+                  14
+                ]
+              },
+              {
+                children: [
+                  15,
+                  16,
+                  17
+                ]
+              }
+            ]
+          },
+          {
+            children: [
+              18,
+              19,
+              20,
+              {
+                children: [
+                  21,
+                  22,
+                  23
+                ]
+              },
+              {
+                children: [
+                  24,
+                  25,
+                  26
+                ]
+              }
+            ]
+          },
+          {
+            children: [
+              27,
+              28,
+              29,
+              {
+                children: [
+                  30,
+                  31,
+                  32
+                ]
+              },
+              {
+                children: [
+                  33,
+                  34,
+                  35
+                ]
+              },
+              {
+                children: [
+                  36,
+                  37,
+                  38,
+                  {
+                    children: [
+                      39,
+                      40,
+                      41
+                    ]
+                  },
+                  {
+                    children: [
+                      42,
+                      43,
+                      44
+                    ]
+                  }
+                ]
+              },
+              {
+                children: [
+                  45,
+                  46,
+                  47,
+                  {
+                    children: [
+                      48,
+                      49,
+                      50
+                    ]
+                  },
+                  {
+                    children: [
+                      51,
+                      52,
+                      53
+                    ]
+                  }
+                ]
+              }
+            ]
+          },
+          {
+            children: [
+              54,
+              55,
+              56,
+              {
+                children: [
+                  57,
+                  58,
+                  59
+                ]
+              },
+              {
+                children: [
+                  60,
+                  61,
+                  62
+                ]
+              },
+              {
+                children: [
+                  63,
+                  64,
+                  65,
+                  {
+                    children: [
+                      66,
+                      67
+                    ]
+                  }
+                ]
+              }
+            ]
+          }
+        ]
+      }
+    ])
+  })
+
+  it('reduces 93 values correctly', async () => {
+    const result = await all(builder(createValues(93), reduce, options))
+
+    expect(result).to.deep.equal([
+      {
+        children: [
+          0,
+          1,
+          2,
+          {
+            children: [
+              3,
+              4,
+              5
+            ]
+          },
+          {
+            children: [
+              6,
+              7,
+              8
+            ]
+          },
+          {
+            children: [
+              9,
+              10,
+              11,
+              {
+                children: [
+                  12,
+                  13,
+                  14
+                ]
+              },
+              {
+                children: [
+                  15,
+                  16,
+                  17
+                ]
+              }
+            ]
+          },
+          {
+            children: [
+              18,
+              19,
+              20,
+              {
+                children: [
+                  21,
+                  22,
+                  23
+                ]
+              },
+              {
+                children: [
+                  24,
+                  25,
+                  26
+                ]
+              }
+            ]
+          },
+          {
+            children: [
+              27,
+              28,
+              29,
+              {
+                children: [
+                  30,
+                  31,
+                  32
+                ]
+              },
+              {
+                children: [
+                  33,
+                  34,
+                  35
+                ]
+              },
+              {
+                children: [
+                  36,
+                  37,
+                  38,
+                  {
+                    children: [
+                      39,
+                      40,
+                      41
+                    ]
+                  },
+                  {
+                    children: [
+                      42,
+                      43,
+                      44
+                    ]
+                  }
+                ]
+              },
+              {
+                children: [
+                  45,
+                  46,
+                  47,
+                  {
+                    children: [
+                      48,
+                      49,
+                      50
+                    ]
+                  },
+                  {
+                    children: [
+                      51,
+                      52,
+                      53
+                    ]
+                  }
+                ]
+              }
+            ]
+          },
+          {
+            children: [
+              54,
+              55,
+              56,
+              {
+                children: [
+                  57,
+                  58,
+                  59
+                ]
+              },
+              {
+                children: [
+                  60,
+                  61,
+                  62
+                ]
+              },
+              {
+                children: [
+                  63,
+                  64,
+                  65,
+                  {
+                    children: [
+                      66,
+                      67,
+                      68
+                    ]
+                  },
+                  {
+                    children: [
+                      69,
+                      70,
+                      71
+                    ]
+                  }
+                ]
+              },
+              {
+                children: [
+                  72,
+                  73,
+                  74,
+                  {
+                    children: [
+                      75,
+                      76,
+                      77
+                    ]
+                  },
+                  {
+                    children: [
+                      78,
+                      79,
+                      80
+                    ]
+                  }
+                ]
+              }
+            ]
+          },
+          {
+            children: [
+              81,
+              82,
+              83,
+              {
+                children: [
+                  84,
+                  85,
+                  86
+                ]
+              },
+              {
+                children: [
+                  87,
+                  88,
+                  89
+                ]
+              },
+              {
+                children: [
+                  90,
+                  91,
+                  92
+                ]
+              }
+            ]
+          }
+        ]
+      }
+    ])
+  })
+})
diff --git a/packages/ipfs-unixfs-importer/test/builder.spec.js b/packages/ipfs-unixfs-importer/test/builder.spec.js
new file mode 100644
index 00000000..c9d5522d
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/builder.spec.js
@@ -0,0 +1,108 @@
+/* eslint-env mocha */
+'use strict'
+
+const chai = require('chai')
+chai.use(require('dirty-chai'))
+const expect = chai.expect
+const mh = require('multihashing-async').multihash
+const IPLD = require('ipld')
+const inMemory = require('ipld-in-memory')
+const UnixFS = require('ipfs-unixfs')
+const builder = require('../src/dag-builder')
+const first = require('it-first')
+
+describe('builder', () => {
+  let ipld
+
+  before(async () => {
+    ipld = await inMemory(IPLD)
+  })
+
+  const testMultihashes = Object.keys(mh.names).slice(1, 40)
+  const opts = {
+    strategy: 'flat',
+    chunker: 'fixed',
+    leafType: 'file',
+    reduceSingleLeafToSelf: true,
+    format: 'dag-pb',
+    hashAlg: 'sha2-256',
+    progress: () => {},
+    maxChunkSize: 262144
+  }
+
+  it('allows multihash hash algorithm to be specified', async () => {
+    for (let i = 0; i < testMultihashes.length; i++) {
+      const hashAlg = testMultihashes[i]
+      const options = {
+        ...opts,
+        hashAlg
+      }
+      const content = String(Math.random() + Date.now())
+      const inputFile = {
+        path: content + '.txt',
+        content: Buffer.from(content)
+      }
+
+      const imported = await (await first(builder([inputFile], ipld, options)))()
+
+      expect(imported).to.exist()
+
+      // Verify multihash has been encoded using hashAlg
+      expect(mh.decode(imported.cid.multihash).name).to.equal(hashAlg)
+
+      // Fetch using hashAlg encoded multihash
+      const node = await ipld.get(imported.cid)
+
+      const fetchedContent = UnixFS.unmarshal(node.Data).data
+      expect(fetchedContent).to.deep.equal(inputFile.content)
+    }
+  })
+
+  it('allows multihash hash algorithm to be specified for big file', async function () {
+    this.timeout(30000)
+
+    for (let i = 0; i < testMultihashes.length; i++) {
+      const hashAlg = testMultihashes[i]
+      const options = {
+        ...opts,
+        hashAlg
+      }
+      const content = String(Math.random() + Date.now())
+      const inputFile = {
+        path: content + '.txt',
+        // Bigger than maxChunkSize
+        content: Buffer.alloc(262144 + 5).fill(1)
+      }
+
+      const imported = await (await first(builder([inputFile], ipld, options)))()
+
+      expect(imported).to.exist()
+      expect(mh.decode(imported.cid.multihash).name).to.equal(hashAlg)
+    }
+  })
+
+  it('allows multihash hash algorithm to be specified for a directory', async () => {
+    for (let i = 0; i < testMultihashes.length; i++) {
+      const hashAlg = testMultihashes[i]
+
+      const options = {
+        ...opts,
+        hashAlg
+      }
+      const inputFile = {
+        path: `${String(Math.random() + Date.now())}-dir`,
+        content: null
+      }
+
+      const imported = await (await first(builder([Object.assign({}, inputFile)], ipld, options)))()
+
+      expect(mh.decode(imported.cid.multihash).name).to.equal(hashAlg)
+
+      // Fetch using hashAlg encoded multihash
+      const node = await ipld.get(imported.cid)
+
+      const meta = UnixFS.unmarshal(node.Data)
+      expect(meta.type).to.equal('directory')
+    }
+  })
+})
diff --git a/packages/ipfs-unixfs-importer/test/chunker-custom.spec.js b/packages/ipfs-unixfs-importer/test/chunker-custom.spec.js
new file mode 100644
index 00000000..8bc80dda
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/chunker-custom.spec.js
@@ -0,0 +1,73 @@
+/* eslint-env mocha */
+'use strict'
+
+const importer = require('../src')
+
+const chai = require('chai')
+chai.use(require('dirty-chai'))
+const expect = chai.expect
+const IPLD = require('ipld')
+const inMemory = require('ipld-in-memory')
+const mc = require('multicodec')
+
+// eslint bug https://github.com/eslint/eslint/issues/12459
+// eslint-disable-next-line require-await
+const iter = async function * () {
+  yield Buffer.from('one')
+  yield Buffer.from('two')
+}
+
+describe('custom chunker', function () {
+  let inmem
+
+  const fromPartsTest = (iter, size) => async () => {
+    for await (const part of importer([{
+      content: iter()
+    }], inmem, {
+      chunkValidator: source => source,
+      chunker: source => source,
+      bufferImporter: async function * (file, source, ipld, options) {
+        for await (const item of source) {
+          yield () => Promise.resolve(item)
+        }
+      }
+    })) {
+      expect(part.size).to.equal(size)
+    }
+  }
+
+  before(async () => {
+    inmem = await inMemory(IPLD)
+  })
+
+  it('keeps custom chunking', async () => {
+    const chunker = source => source
+    const content = iter()
+    for await (const part of importer([{ path: 'test', content }], inmem, {
+      chunker
+    })) {
+      expect(part.size).to.equal(116)
+    }
+  })
+
+  // eslint bug https://github.com/eslint/eslint/issues/12459
+  const multi = async function * () {
+    yield {
+      size: 11,
+      cid: await inmem.put(Buffer.from('hello world'), mc.RAW)
+    }
+    yield {
+      size: 11,
+      cid: await inmem.put(Buffer.from('hello world'), mc.RAW)
+    }
+  }
+  it('works with multiple parts', fromPartsTest(multi, 120))
+
+  const single = async function * () {
+    yield {
+      size: 11,
+      cid: await inmem.put(Buffer.from('hello world'), mc.RAW)
+    }
+  }
+  it('works with single part', fromPartsTest(single, 19))
+})
diff --git a/packages/ipfs-unixfs-importer/test/chunker-fixed-size.spec.js b/packages/ipfs-unixfs-importer/test/chunker-fixed-size.spec.js
new file mode 100644
index 00000000..276702ab
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/chunker-fixed-size.spec.js
@@ -0,0 +1,85 @@
+/* eslint-env mocha */
+'use strict'
+
+const chunker = require('../src/chunker/fixed-size')
+const chai = require('chai')
+chai.use(require('dirty-chai'))
+const expect = chai.expect
+const isNode = require('detect-node')
+const all = require('it-all')
+const loadFixture = require('aegir/fixtures')
+const rawFile = loadFixture((isNode ? __dirname : 'test') + '/fixtures/1MiB.txt')
+
+describe('chunker: fixed size', function () {
+  this.timeout(30000)
+
+  it('chunks non flat buffers', async () => {
+    const b1 = Buffer.alloc(2 * 256)
+    const b2 = Buffer.alloc(1 * 256)
+    const b3 = Buffer.alloc(5 * 256)
+
+    b1.fill('a')
+    b2.fill('b')
+    b3.fill('c')
+
+    const chunks = await all(chunker([b1, b2, b3], {
+      maxChunkSize: 256
+    }))
+
+    expect(chunks).to.have.length(8)
+    chunks.forEach((chunk) => {
+      expect(chunk).to.have.length(256)
+    })
+  })
+
+  it('256 Bytes chunks', async () => {
+    const input = []
+    const buf = Buffer.from('a')
+
+    for (let i = 0; i < (256 * 12); i++) {
+      input.push(buf)
+    }
+    const chunks = await all(chunker(input, {
+      maxChunkSize: 256
+    }))
+
+    expect(chunks).to.have.length(12)
+    chunks.forEach((chunk) => {
+      expect(chunk).to.have.length(256)
+    })
+  })
+
+  it('256 KiB chunks', async () => {
+    const KiB256 = 262144
+    const chunks = await all(chunker([rawFile], {
+      maxChunkSize: KiB256
+    }))
+
+    expect(chunks).to.have.length(4)
+    chunks.forEach((chunk) => {
+      expect(chunk).to.have.length(KiB256)
+    })
+  })
+
+  it('256 KiB chunks of non scalar filesize', async () => {
+    const KiB256 = 262144
+    const file = Buffer.concat([rawFile, Buffer.from('hello')])
+
+    const chunks = await all(chunker([file], {
+      maxChunkSize: KiB256
+    }))
+
+    expect(chunks).to.have.length(5)
+    let counter = 0
+
+    chunks.forEach((chunk) => {
+      if (chunk.length < KiB256) {
+        counter++
+      } else {
+        expect(chunk).to.have.length(KiB256)
+      }
+    })
+
+    expect(counter).to.equal(1)
+  })
+})
diff --git a/packages/ipfs-unixfs-importer/test/chunker-rabin.spec.js b/packages/ipfs-unixfs-importer/test/chunker-rabin.spec.js
new file mode 100644
index 00000000..9f9a4aff
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/chunker-rabin.spec.js
@@ -0,0 +1,135 @@
+/* eslint-env mocha */
+'use strict'
+
+const chunker = require('../src/chunker/rabin')
+const chai = require('chai')
+chai.use(require('dirty-chai'))
+const expect = chai.expect
+const loadFixture = require('aegir/fixtures')
+const isNode = require('detect-node')
+const all = require('it-all')
+
+const rawFile = loadFixture((isNode ? __dirname : 'test') + '/fixtures/1MiB.txt')
+
+describe('chunker: rabin', function () {
+  this.timeout(30000)
+
+  const defaultOptions = {
+    avgChunkSize: 262144,
+    window: 64,
+    polynomial: 17437180132763653
+  }
+
+  it('chunks non flat buffers', async () => {
+    const b1 = Buffer.alloc(2 * 256)
+    const b2 = Buffer.alloc(1 * 256)
+    const b3 = Buffer.alloc(5 * 256)
+
+    b1.fill('a')
+    b2.fill('b')
+    b3.fill('c')
+
+    const chunks = await all(chunker([b1, b2, b3], {
+      ...defaultOptions,
+      minChunkSize: 48,
+      avgChunkSize: 96,
+      maxChunkSize: 192
+    }))
+
+    const size = chunks.reduce((acc, curr) => acc + curr.length, 0)
+
+    expect(size).to.equal(b1.length + b2.length + b3.length)
+
+    chunks.forEach((chunk, index) => {
+      if (index === chunks.length - 1) {
+        expect(chunk.length).to.equal(128)
+      } else {
+        expect(chunk.length).to.equal(192)
+      }
+    })
+  })
+
+  it('uses default min and max chunk size when only avgChunkSize is specified', async () => {
+    const b1 = Buffer.alloc(10 * 256)
+    b1.fill('a')
+
+    const chunks = await all(chunker([b1], {
+      ...defaultOptions,
+      maxChunkSize: 262144,
+      minChunkSize: 18,
+      avgChunkSize: 256
+    }))
+
+    chunks.forEach((chunk) => {
+      expect(chunk).to.have.length.gte(256 / 3)
+      expect(chunk).to.have.length.lte(256 * (256 / 2))
+    })
+  })
+
+  it('256 KiB avg chunks of non scalar filesize', async () => {
+    const KiB256 = 262144
+    const file = Buffer.concat([rawFile, Buffer.from('hello')])
+    const opts = {
+      ...defaultOptions,
+      minChunkSize: KiB256 / 3,
+      avgChunkSize: KiB256,
+      maxChunkSize: KiB256 + (KiB256 / 2)
+    }
+
+    const chunks = await all(chunker([file], opts))
+
+    chunks.forEach((chunk) => {
+      expect(chunk).to.have.length.gte(opts.minChunkSize)
+      expect(chunk).to.have.length.lte(opts.maxChunkSize)
+    })
+  })
+
+  it('throws when min chunk size is too small', async () => {
+    const opts = {
+      ...defaultOptions,
+      minChunkSize: 1,
+      maxChunkSize: 100
+    }
+
+    try {
+      await all(chunker([], opts))
+      throw new Error('Should have thrown')
+    } catch (err) {
+      expect(err.code).to.equal('ERR_INVALID_MIN_CHUNK_SIZE')
+    }
+  })
+
+  it('throws when avg chunk size is not specified', async () => {
+    const opts = {
+      ...defaultOptions,
+      avgChunkSize: undefined
+    }
+
+    try {
+      await all(chunker([], opts))
+      throw new Error('Should have thrown')
+    } catch (err) {
+      expect(err.code).to.equal('ERR_INVALID_AVG_CHUNK_SIZE')
+    }
+  })
+
+  it('uses the min chunk size when max and avg are too small', async () => {
+    const file = Buffer.concat([rawFile, Buffer.from('hello')])
+    const opts = {
+      ...defaultOptions,
+      minChunkSize: 100,
+      maxChunkSize: 5,
+      avgChunkSize: 5
+    }
+
+    const chunks = await all(chunker([file], opts))
+
+    chunks.forEach((chunk, index) => {
+      if (index === chunks.length - 1) {
+        expect(chunk.length).to.equal(81)
+      } else {
+        expect(chunk.length).to.equal(100)
+      }
+    })
+  })
+})
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt
new file mode 100644
index 00000000..6e306c55
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.block b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.block
new file mode 100644
index 00000000..f57749f0
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.block differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block0 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block0
new file mode 100644
index 00000000..a6e00f34
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block0 differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block1 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block1
new file mode 100644
index 00000000..f4c039c2
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block1 differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block2 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block2
new file mode 100644
index 00000000..64ce0aeb
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block2 differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block3 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block3
new file mode 100644
index 00000000..c1f9899a
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block3 differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block4 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block4
new file mode 100644
index 00000000..cbd601a6
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block4 differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-file b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-file
new file mode 100644
index 00000000..e7229e0e
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-file
@@ -0,0 +1 @@
+��L �� �� �� �� ��
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw0 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw0
new file mode 100644
index 00000000..36ff3333
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw0 differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw1 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw1
new file mode 100644
index 00000000..fa626274
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw1 differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw2 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw2
new file mode 100644
index 00000000..f7ea5c2e
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw2 differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw3 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw3
new file mode 100644
index 00000000..de99ffe5
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw3 differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw4 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw4
new file mode 100644
index 00000000..0e438a15
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw4 differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1MiB.txt b/packages/ipfs-unixfs-importer/test/fixtures/1MiB.txt
new file mode 100644
index 00000000..60770c23
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1MiB.txt differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt b/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt
new file mode 100644
index 00000000..d95023c7
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt
@@ -0,0 +1,4 @@
+�wx���xM��{
+D���zH/&^����RS���/��v,��R
+�=��N���g~���pf1�\[�>�%��U�1�@Q���׀2&m6�q���Q؁��]��|���!�KE�~J֕읝�o�j��b�n3�eT�)D+;s
+컓��:Ty!c�3����\*���T7��E?[��Pv}��A+�c�x�~�e�
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt.block b/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt.block
new file mode 100644
index 00000000..a655cf83
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt.block
@@ -0,0 +1,5 @@
+
+���wx���xM��{
+D���zH/&^����RS���/��v,��R
+�=��N���g~���pf1�\[�>�%��U�1�@Q���׀2&m6�q���Q؁��]��|���!�KE�~J֕읝�o�j��b�n3�eT�)D+;s
+컓��:Ty!c�3����\*���T7��E?[��Pv}��A+�c�x�~�e��
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt.unixfs-file b/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt.unixfs-file
new file mode 100644
index 00000000..b93a6da8
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt.unixfs-file
@@ -0,0 +1,4 @@
+��wx���xM��{
+D���zH/&^����RS���/��v,��R
+�=��N���g~���pf1�\[�>�%��U�1�@Q���׀2&m6�q���Q؁��]��|���!�KE�~J֕읝�o�j��b�n3�eT�)D+;s
+컓��:Ty!c�3����\*���T7��E?[��Pv}��A+�c�x�~�e��
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-big.block b/packages/ipfs-unixfs-importer/test/fixtures/dir-big.block
new file mode 100644
index 00000000..ce734230
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/fixtures/dir-big.block
@@ -0,0 +1,4 @@
+4
+" si����"�¹W�<����G|��e��4	� 3
+1.2MiB.txt��L
+
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-big.unixfs-dir b/packages/ipfs-unixfs-importer/test/fixtures/dir-big.unixfs-dir
new file mode 100644
index 00000000..e19a122a
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/fixtures/dir-big.unixfs-dir
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-big/1.2MiB.txt b/packages/ipfs-unixfs-importer/test/fixtures/dir-big/1.2MiB.txt
new file mode 100644
index 00000000..6e306c55
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/dir-big/1.2MiB.txt differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-nested.block b/packages/ipfs-unixfs-importer/test/fixtures/dir-nested.block
new file mode 100644
index 00000000..e3ec206f
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/fixtures/dir-nested.block
@@ -0,0 +1,5 @@
+5
+" $��G�,�A�4{���x�Z/.����D`�200Bytes.txt�3
+" Y��9_)a���˹2�R�m�Ŗke�9��dir-another0
+" Ty�5;_9Yf�q��F�Lhyl���/��level-1�
+
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-nested/200Bytes.txt b/packages/ipfs-unixfs-importer/test/fixtures/dir-nested/200Bytes.txt
new file mode 100644
index 00000000..d95023c7
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/fixtures/dir-nested/200Bytes.txt
@@ -0,0 +1,4 @@
+�wx���xM��{
+D���zH/&^����RS���/��v,��R
+�=��N���g~���pf1�\[�>�%��U�1�@Q���׀2&m6�q���Q؁��]��|���!�KE�~J֕읝�o�j��b�n3�eT�)D+;s
+컓��:Ty!c�3����\*���T7��E?[��Pv}��A+�c�x�~�e�
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-nested/level-1/200Bytes.txt b/packages/ipfs-unixfs-importer/test/fixtures/dir-nested/level-1/200Bytes.txt
new file mode 100644
index 00000000..d95023c7
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/fixtures/dir-nested/level-1/200Bytes.txt
@@ -0,0 +1,4 @@
+�wx���xM��{
+D���zH/&^����RS���/��v,��R
+�=��N���g~���pf1�\[�>�%��U�1�@Q���׀2&m6�q���Q؁��]��|���!�KE�~J֕읝�o�j��b�n3�eT�)D+;s
+컓��:Ty!c�3����\*���T7��E?[��Pv}��A+�c�x�~�e�
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-small.block b/packages/ipfs-unixfs-importer/test/fixtures/dir-small.block
new file mode 100644
index 00000000..5accb645
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/fixtures/dir-small.block
@@ -0,0 +1,3 @@
+5
+" $��G�,�A�4{���x�Z/.����D`�200Bytes.txt�
+
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-small.unixfs-dir b/packages/ipfs-unixfs-importer/test/fixtures/dir-small.unixfs-dir
new file mode 100644
index 00000000..e19a122a
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/fixtures/dir-small.unixfs-dir
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-small/200Bytes.txt b/packages/ipfs-unixfs-importer/test/fixtures/dir-small/200Bytes.txt
new file mode 100644
index 00000000..d95023c7
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/fixtures/dir-small/200Bytes.txt
@@ -0,0 +1,4 @@
+�wx���xM��{
+D���zH/&^����RS���/��v,��R
+�=��N���g~���pf1�\[�>�%��U�1�@Q���׀2&m6�q���Q؁��]��|���!�KE�~J֕읝�o�j��b�n3�eT�)D+;s
+컓��:Ty!c�3����\*���T7��E?[��Pv}��A+�c�x�~�e�
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-with-empty-files/empty-file.txt b/packages/ipfs-unixfs-importer/test/fixtures/dir-with-empty-files/empty-file.txt
new file mode 100644
index 00000000..e69de29b
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/empty.txt b/packages/ipfs-unixfs-importer/test/fixtures/empty.txt
new file mode 100644
index 00000000..e69de29b
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/foo-big/1.2MiB.txt b/packages/ipfs-unixfs-importer/test/fixtures/foo-big/1.2MiB.txt
new file mode 100644
index 00000000..6e306c55
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/foo-big/1.2MiB.txt differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/foo/bar/200Bytes.txt b/packages/ipfs-unixfs-importer/test/fixtures/foo/bar/200Bytes.txt
new file mode 100644
index 00000000..d95023c7
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/fixtures/foo/bar/200Bytes.txt
@@ -0,0 +1,4 @@
+�wx���xM��{
+D���zH/&^����RS���/��v,��R
+�=��N���g~���pf1�\[�>�%��U�1�@Q���׀2&m6�q���Q؁��]��|���!�KE�~J֕읝�o�j��b�n3�eT�)D+;s
+컓��:Ty!c�3����\*���T7��E?[��Pv}��A+�c�x�~�e�
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/ipfsmarket-1.ogv b/packages/ipfs-unixfs-importer/test/fixtures/ipfsmarket-1.ogv
new file mode 100644
index 00000000..55e83f48
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/ipfsmarket-1.ogv differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/pam/1.2MiB.txt b/packages/ipfs-unixfs-importer/test/fixtures/pam/1.2MiB.txt
new file mode 100644
index 00000000..6e306c55
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/pam/1.2MiB.txt differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/pam/pum/1.2MiB.txt b/packages/ipfs-unixfs-importer/test/fixtures/pam/pum/1.2MiB.txt
new file mode 100644
index 00000000..6e306c55
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/pam/pum/1.2MiB.txt differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/pam/pum/200Bytes.txt b/packages/ipfs-unixfs-importer/test/fixtures/pam/pum/200Bytes.txt
new file mode 100644
index 00000000..d95023c7
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/fixtures/pam/pum/200Bytes.txt
@@ -0,0 +1,4 @@
+�wx���xM��{
+D���zH/&^����RS���/��v,��R
+�=��N���g~���pf1�\[�>�%��U�1�@Q���׀2&m6�q���Q؁��]��|���!�KE�~J֕읝�o�j��b�n3�eT�)D+;s
+컓��:Ty!c�3����\*���T7��E?[��Pv}��A+�c�x�~�e�
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/pim/1.2MiB.txt b/packages/ipfs-unixfs-importer/test/fixtures/pim/1.2MiB.txt
new file mode 100644
index 00000000..6e306c55
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/pim/1.2MiB.txt differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/pim/200Bytes.txt b/packages/ipfs-unixfs-importer/test/fixtures/pim/200Bytes.txt
new file mode 100644
index 00000000..d95023c7
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/fixtures/pim/200Bytes.txt
@@ -0,0 +1,4 @@
+�wx���xM��{
+D���zH/&^����RS���/��v,��R
+�=��N���g~���pf1�\[�>�%��U�1�@Q���׀2&m6�q���Q؁��]��|���!�KE�~J֕읝�o�j��b�n3�eT�)D+;s
+컓��:Ty!c�3����\*���T7��E?[��Pv}��A+�c�x�~�e�
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/small.txt b/packages/ipfs-unixfs-importer/test/fixtures/small.txt
new file mode 100644
index 00000000..f81fce04
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/fixtures/small.txt
@@ -0,0 +1 @@
+this is a file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/test-file.txt b/packages/ipfs-unixfs-importer/test/fixtures/test-file.txt
new file mode 100644
index 00000000..b3ab23d1
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/fixtures/test-file.txt
@@ -0,0 +1,20361 @@
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/test-file.txt.link-block0 b/packages/ipfs-unixfs-importer/test/fixtures/test-file.txt.link-block0
new file mode 100644
index 00000000..f9810363
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/fixtures/test-file.txt.link-block0
@@ -0,0 +1,4728 @@
+
+����There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some��
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/test-video.ogv b/packages/ipfs-unixfs-importer/test/fixtures/test-video.ogv
new file mode 100644
index 00000000..55e83f48
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/test-video.ogv differ
diff --git a/packages/ipfs-unixfs-importer/test/hash-parity-with-go-ipfs.spec.js b/packages/ipfs-unixfs-importer/test/hash-parity-with-go-ipfs.spec.js
new file mode 100644
index 00000000..94e44fda
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/hash-parity-with-go-ipfs.spec.js
@@ -0,0 +1,51 @@
+/* eslint-env mocha */
+'use strict'
+
+const importer = require('../src')
+
+const chai = require('chai')
+chai.use(require('dirty-chai'))
+const expect = chai.expect
+const IPLD = require('ipld')
+const inMemory = require('ipld-in-memory')
+const randomByteStream = require('./helpers/finite-pseudorandom-byte-stream')
+const first = require('it-first')
+
+const strategies = [
+  'flat',
+  'trickle',
+  'balanced'
+]
+
+const expectedHashes = {
+  flat: 'QmeJ9FRWKnXZQiX5CM1E8j4gpGbg6otpgajThqsbnBpoyD',
+  balanced: 'QmRdPboiJQoZ5cdazR9a8vGqdJvWg6M5bfdtUSKNHpuscj',
+  trickle: 'QmdZcefqMZ3tzdS4CRBN5s1c67eS3nQzN8TNXFBYfgofoy'
+}
+
+strategies.forEach(strategy => {
+  const options = {
+    strategy: strategy
+  }
+
+  describe('go-ipfs interop using importer:' + strategy, () => {
+    let ipld
+
+    before(async () => {
+      ipld = await inMemory(IPLD)
+    })
+
+    it('yields the same tree as go-ipfs', async function () {
+      this.timeout(100 * 1000)
+
+      const source = [{
+        path: 'big.dat',
+        content: randomByteStream(45900000, 7382)
+      }]
+
+      const file = await first(importer(source, ipld, options))
+
+      expect(file.cid.toBaseEncodedString()).to.be.equal(expectedHashes[strategy])
+    })
+  })
+})
diff --git a/packages/ipfs-unixfs-importer/test/helpers/collect-leaf-cids.js b/packages/ipfs-unixfs-importer/test/helpers/collect-leaf-cids.js
new file mode 100644
index 00000000..4ef6a4e9
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/helpers/collect-leaf-cids.js
@@ -0,0 +1,20 @@
+'use strict'
+
+module.exports = function (cid, ipld) {
+  async function * traverse (cid) {
+    const node = await ipld.get(cid)
+
+    if (Buffer.isBuffer(node) || !node.Links.length) {
+      yield {
+        node,
+        cid
+      }
+
+      return
+    }
+
+    node.Links.forEach(link => traverse(link.Hash))
+  }
+
+  return traverse(cid)
+}
diff --git a/packages/ipfs-unixfs-importer/test/helpers/finite-pseudorandom-byte-stream.js b/packages/ipfs-unixfs-importer/test/helpers/finite-pseudorandom-byte-stream.js
new file mode 100644
index 00000000..3b07c734
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/helpers/finite-pseudorandom-byte-stream.js
@@ -0,0 +1,24 @@
+'use strict'
+
+const REPEATABLE_CHUNK_SIZE = 300000
+
+module.exports = function * (maxSize, seed) {
+  const chunks = Math.ceil(maxSize / REPEATABLE_CHUNK_SIZE)
+  let emitted = 0
+  const buf = Buffer.alloc(REPEATABLE_CHUNK_SIZE)
+
+  while (emitted !== chunks) {
+    for (let i = 0; i < buf.length; i++) {
+      buf[i] = 256 & Math.floor(random(seed) * 256)
+    }
+
+    yield buf
+
+    emitted++
+  }
+}
+
+function random (seed) {
+  const x = Math.sin(seed) * 10000
+  return x - Math.floor(x)
+}
diff --git a/packages/ipfs-unixfs-importer/test/helpers/random-byte-stream.js b/packages/ipfs-unixfs-importer/test/helpers/random-byte-stream.js
new file mode 100644
index 00000000..776ae90f
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/helpers/random-byte-stream.js
@@ -0,0 +1,15 @@
+'use strict'
+
+module.exports = function * randomByteStream (seed) {
+  while (true) {
+    const r = Math.floor(random(seed) * 256)
+    seed = r
+
+    yield Buffer.from([r])
+  }
+}
+
+function random (seed) {
+  const x = Math.sin(seed) * 10000
+  return x - Math.floor(x)
+}
diff --git a/packages/ipfs-unixfs-importer/test/helpers/stream-to-array.js b/packages/ipfs-unixfs-importer/test/helpers/stream-to-array.js
new file mode 100644
index 00000000..e69de29b
diff --git a/packages/ipfs-unixfs-importer/test/import-export-nested-dir.spec.js b/packages/ipfs-unixfs-importer/test/import-export-nested-dir.spec.js
new file mode 100644
index 00000000..ae607121
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/import-export-nested-dir.spec.js
@@ -0,0 +1,115 @@
+/* eslint-env mocha */
+'use strict'
+
+const chai = require('chai')
+chai.use(require('dirty-chai'))
+const expect = chai.expect
+const IPLD = require('ipld')
+const inMemory = require('ipld-in-memory')
+const all = require('it-all')
+const importer = require('../src')
+const exporter = require('ipfs-unixfs-exporter')
+
+describe('import and export: directory', () => {
+  const rootHash = 'QmdCrquDwd7RfZ6GCZFEVADwe8uyyw1YmF9mtAB7etDgmK'
+  let ipld
+
+  before(async () => {
+    ipld = await inMemory(IPLD)
+  })
+
+  it('imports', async function () {
+    this.timeout(20 * 1000)
+
+    const source = [{
+      path: 'a/b/c/d/e',
+      content: Buffer.from('banana')
+    }, {
+      path: 'a/b/c/d/f',
+      content: Buffer.from('strawberry')
+    }, {
+      path: 'a/b/g',
+      content: Buffer.from('ice')
+    }, {
+      path: 'a/b/h',
+      content: Buffer.from('cream')
+    }]
+
+    const files = await all(importer(source, ipld))
+
+    expect(files.map(normalizeNode).sort(byPath)).to.be.eql([{
+      path: 'a/b/h',
+      multihash: 'QmWHMpCtdNjemT2F3SjyrmnBXQXwEohaZd4apcbFBhbFRC'
+    }, {
+      path: 'a/b/g',
+      multihash: 'QmQGwYzzTPcbqTiy2Nbp88gqqBqCWY4QZGfen45LFZkD5n'
+    }, {
+      path: 'a/b/c/d/f',
+      multihash: 'QmNVHs2dy7AjGUotsubWVncRsD3SpRXm8MgmCCQTVdVACz'
+    }, {
+      path: 'a/b/c/d/e',
+      multihash: 'QmYPbDKwc7oneCcEc6BcRSN5GXthTGWUCd19bTCyP9u3vH'
+    }, {
+      path: 'a/b/c/d',
+      multihash: 'QmQGDXr3ysARM38n7h79Tx7yD3YxuzcnZ1naG71WMojPoj'
+    }, {
+      path: 'a/b/c',
+      multihash: 'QmYTVcjYpN3hQLtJstCPE8hhEacAYjWAuTmmAAXoonamuE'
+    }, {
+      path: 'a/b',
+      multihash: 'QmWyWYxq1GD9fEyckf5LrJv8hMW35CwfWwzDBp8bTw3NQj'
+    }, {
+      path: 'a',
+      multihash: rootHash
+    }])
+  })
+
+  it('exports', async function () {
+    this.timeout(20 * 1000)
+
+    const dir = await exporter(rootHash, ipld)
+    const files = await recursiveExport(dir, rootHash)
+
+    expect(files.sort(byPath)).to.eql([{
+      path: 'QmdCrquDwd7RfZ6GCZFEVADwe8uyyw1YmF9mtAB7etDgmK/b/h',
+      content: 'cream'
+    }, {
+      path: 'QmdCrquDwd7RfZ6GCZFEVADwe8uyyw1YmF9mtAB7etDgmK/b/g',
+      content: 'ice'
+    }, {
+      path: 'QmdCrquDwd7RfZ6GCZFEVADwe8uyyw1YmF9mtAB7etDgmK/b/c/d/f',
+      content: 'strawberry'
+    }, {
+      path: 'QmdCrquDwd7RfZ6GCZFEVADwe8uyyw1YmF9mtAB7etDgmK/b/c/d/e',
+      content: 'banana'
+    }])
+  })
+})
+
+async function recursiveExport (node, path, entries = []) {
+  for await (const entry of node.content()) {
+    if (entry.unixfs.type === 'directory') {
+      await recursiveExport(entry, `${path}/${entry.name}`, entries)
+    } else {
+      entries.push({
+        path: `${path}/${entry.name}`,
+        content: Buffer.concat(await all(entry.content())).toString()
+      })
+    }
+  }
+
+  return entries
+}
+
+function normalizeNode (node) {
+  return {
+    path: node.path,
+    multihash: node.cid.toBaseEncodedString()
+  }
+}
+
+function byPath (a, b) {
+  if (a.path > b.path) return -1
+  if (a.path < b.path) return 1
+  return 0
+}
diff --git a/packages/ipfs-unixfs-importer/test/import-export.spec.js b/packages/ipfs-unixfs-importer/test/import-export.spec.js
new file mode 100644
index 00000000..16a91eae
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/import-export.spec.js
@@ -0,0 +1,50 @@
+/* eslint-env mocha */
+/* eslint max-nested-callbacks: ["error", 5] */
+'use strict'
+
+const chai = require('chai')
+chai.use(require('dirty-chai'))
+const expect = chai.expect
+const IPLD = require('ipld')
+const inMemory = require('ipld-in-memory')
+const loadFixture = require('aegir/fixtures')
+const isNode = require('detect-node')
+const bigFile = loadFixture((isNode ? __dirname : 'test') + '/fixtures/1.2MiB.txt')
+
+const importer = require('../src')
+const exporter = require('ipfs-unixfs-exporter')
+
+const strategies = [
+  'flat',
+  'balanced',
+  'trickle'
+]
+
+describe('import and export', function () {
+  this.timeout(30 * 1000)
+
+  strategies.forEach((strategy) => {
+    const importerOptions = { strategy: strategy }
+
+    describe('using builder: ' + strategy, () => {
+      let ipld
+
+      before(async () => {
+        ipld = await inMemory(IPLD)
+      })
+
+      it('imports and exports', async () => {
+        const path = `${strategy}-big.dat`
+        const values = [{ path: path, content: bigFile }]
+
+        for await (const file of importer(values, ipld, importerOptions)) {
+          expect(file.path).to.eql(path)
+
+          const result = await exporter(file.cid, ipld)
+
+          expect(result.unixfs.fileSize()).to.eql(bigFile.length)
+        }
+      })
+    })
+  })
+})
diff --git a/packages/ipfs-unixfs-importer/test/importer.spec.js b/packages/ipfs-unixfs-importer/test/importer.spec.js
new file mode 100644
index 00000000..0f5b6589
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/importer.spec.js
@@ -0,0 +1,987 @@
+/* eslint-env mocha */
+'use strict'
+
+const importer = require('../src')
+const exporter = require('ipfs-unixfs-exporter')
+
+const extend = require('deep-extend')
+const chai = require('chai')
+chai.use(require('dirty-chai'))
+const expect = chai.expect
+const spy = require('sinon/lib/sinon/spy')
+const IPLD = require('ipld')
+const inMemory = require('ipld-in-memory')
+const UnixFs = require('ipfs-unixfs')
+const collectLeafCids = require('./helpers/collect-leaf-cids')
+const loadFixture = require('aegir/fixtures')
+const isNode = require('detect-node')
+const bigFile = loadFixture((isNode ? __dirname : 'test') + '/fixtures/1.2MiB.txt')
+const smallFile = loadFixture((isNode ? __dirname : 'test') + '/fixtures/200Bytes.txt')
+const all = require('it-all')
+const first = require('it-first')
+
+function stringifyMh (files) {
+  return files.map((file) => {
+    return {
+      ...file,
+      cid: file.cid.toBaseEncodedString()
+    }
+  })
+}
+
+function dateToTimespec (date) {
+  const ms = date.getTime()
+  const secs = Math.floor(ms / 1000)
+
+  return {
+    secs,
+    nsecs: (ms - (secs * 1000)) * 1000
+  }
+}
+
+const baseFiles = {
+  '200Bytes.txt': {
+    cid: 'QmQmZQxSKQppbsWfVzBvg59Cn3DKtsNVQ94bjAxg2h3Lb8',
+    size: 200,
+    type: 'file',
+    path: '200Bytes.txt'
+  },
+  '1.2MiB.txt': {
+    cid: 'QmW7BDxEbGqxxSYVtn3peNPQgdDXbWkoQ6J1EFYAEuQV3Q',
+    size: 1258000,
+    type: 'file',
+    path: '1.2MiB.txt'
+  },
+  'small.txt': {
+    cid: 'QmZMb7HWpbevpcdhbUV1ZZgdji8vh5uQ13KxczChGrK9Rd',
+    size: 15,
+    type: 'file',
+    path: 'small.txt'
+  }
+}
+
+const strategyBaseFiles = {
+  flat: baseFiles,
+  balanced: extend({}, baseFiles, {
+    '1.2MiB.txt': {
+      cid: 'QmW7BDxEbGqxxSYVtn3peNPQgdDXbWkoQ6J1EFYAEuQV3Q',
+      type: 'file'
+    }
+  }),
+  trickle: extend({}, baseFiles, {
+    '200Bytes.txt': {
+      cid: 'QmY8bwnoKAKvJ8qtyPhWNxSS6sxiGVTJ9VpdQffs2KB5pE',
+      size: 200,
+      type: 'file',
+      path: '200Bytes.txt'
+    },
+    '1.2MiB.txt': {
+      cid: 'QmfAxsHrpaLLuhbqqbo9KQyvQNawMnVSwutYoJed75pnco',
+      type: 'file'
+    }
+  })
+}
+
+const strategies = [
+  'flat',
+  'balanced',
+  'trickle'
+]
+
+const strategyOverrides = {
+  balanced: {
+    'foo-big': {
+      cid: 'QmaFgyFJUP4fxFySJCddg2Pj6rpwSywopWk87VEVv52RSj',
+      path: 'foo-big',
+      size: 1335478,
+      type: 'directory'
+    },
+    pim: {
+      cid: 'QmY8a78tx6Tk6naDgWCgTsd9EqGrUJRrH7dDyQhjyrmH2i',
+      path: 'pim',
+      size: 1335744,
+      type: 'directory'
+    },
+    'pam/pum': {
+      cid: 'QmY8a78tx6Tk6naDgWCgTsd9EqGrUJRrH7dDyQhjyrmH2i',
+      path: 'pam/pum',
+      size: 1335744,
+      type: 'directory'
+    },
+    pam: {
+      cid: 'QmRgdtzNx1H1BPJqShdhvWZ2D4DA2HUgZJ3XLtoXei27Av',
+      path: 'pam',
+      size: 2671269,
+      type: 'directory'
+    }
+  },
+  trickle: {
+    'foo-big': {
+      cid: 'QmaKbhFRy9kcCbcwrLsqYHWMiY44BDYkqTCMpAxDdd2du2',
+      path: 'foo-big',
+      size: 1334657,
+      type: 'directory'
+    },
+    pim: {
+      cid: 'QmbWGdnua4YuYpWJb7fE25PRbW9GbKKLqq9Ucmnsg2gxnt',
+      path: 'pim',
+      size: 1334923,
+      type: 'directory'
+    },
+    'pam/pum': {
+      cid: 'QmbWGdnua4YuYpWJb7fE25PRbW9GbKKLqq9Ucmnsg2gxnt',
+      path: 'pam/pum',
+      size: 1334923,
+      type: 'directory'
+    },
+    pam: {
+      cid: 'QmSuh47G9Qm3PFv1zziojtHxqCjuurSdtWAzxLxoKJPq2U',
+      path: 'pam',
+      size: 2669627,
+      type: 'directory'
+    },
+    '200Bytes.txt with raw leaves': {
+      cid: 'QmagyRwMfYhczYNv5SvcJc8xxXjZQBTTHS2jEqNMva2mYT',
+      size: 200,
+      path: '200Bytes.txt',
+      type: 'file'
+    },
+    'foo/bar': {
+      cid: 'QmTGMxKPzSGNBDp6jhTwnZxGW6w1S9ciyycRJ4b2qcQaHK',
+      size: 0,
+      path: 'foo/bar',
+      type: 'directory'
+    },
+    foo: {
+      cid: 'Qme4A8fZmwfZESappfPcxSMTZVACiEzhHKtYRMuM1hbkDp',
+      size: 0,
+      path: 'foo',
+      type: 'directory'
+    },
+    'small.txt': {
+      cid: 'QmXmZ3qT328JxWtQXqrmvma2FmPp7tMdNiSuYvVJ5QRhKs',
+      size: 15,
+      type: 'file',
+      path: 'small.txt'
+    }
+  }
+}
+
+const checkLeafNodeTypes = async (ipld, options, expected) => {
+  const file = await first(importer([{
+    path: 'foo',
+    content: Buffer.alloc(262144 + 5).fill(1)
+  }], ipld, options))
+
+  const node = await ipld.get(file.cid)
+  const meta = UnixFs.unmarshal(node.Data)
+
+  expect(meta.type).to.equal('file')
+  expect(node.Links.length).to.equal(2)
+
+  const linkedNodes = await Promise.all(
+    node.Links.map(link => ipld.get(link.Hash))
+  )
+
+  linkedNodes.forEach(node => {
+    const meta = UnixFs.unmarshal(node.Data)
+    expect(meta.type).to.equal(expected)
+  })
+}
+
+const checkNodeLinks = async (ipld, options, expected) => {
+  for await (const file of importer([{
+    path: 'foo',
+    content: Buffer.alloc(100).fill(1)
+  }], ipld, options)) {
+    const node = await ipld.get(file.cid)
+    const meta = UnixFs.unmarshal(node.Data)
+
+    expect(meta.type).to.equal('file')
+    expect(node.Links.length).to.equal(expected)
+  }
+}
+
+strategies.forEach((strategy) => {
+  const baseFiles = strategyBaseFiles[strategy]
+  const defaultResults = extend({}, baseFiles, {
+    'foo/bar/200Bytes.txt': extend({}, baseFiles['200Bytes.txt'], {
+      path: 'foo/bar/200Bytes.txt'
+    }),
+    foo: {
+      path: 'foo',
+      cid: 'QmQrb6KKWGo8w7zKfx2JksptY6wN7B2ysSBdKZr4xMU36d',
+      size: 320,
+      type: 'directory'
+    },
+    'foo/bar': {
+      path: 'foo/bar',
+      cid: 'Qmf5BQbTUyUAvd6Ewct83GYGnE1F6btiC3acLhR8MDxgkD',
+      size: 270,
+      type: 'directory'
+    },
+    'foo-big/1.2MiB.txt': extend({}, baseFiles['1.2MiB.txt'], {
+      path: 'foo-big/1.2MiB.txt'
+    }),
+    'foo-big': {
+      path: 'foo-big',
+      cid: 'QmaFgyFJUP4fxFySJCddg2Pj6rpwSywopWk87VEVv52RSj',
+      size: 1328120,
+      type: 'directory'
+    },
+    'pim/200Bytes.txt': extend({}, baseFiles['200Bytes.txt'], {
+      path: 'pim/200Bytes.txt'
+    }),
+    'pim/1.2MiB.txt': extend({}, baseFiles['1.2MiB.txt'], {
+      path: 'pim/1.2MiB.txt'
+    }),
+    pim: {
+      path: 'pim',
+      cid: 'QmY8a78tx6Tk6naDgWCgTsd9EqGrUJRrH7dDyQhjyrmH2i',
+      size: 1328386,
+      type: 'directory'
+    },
+    'empty-dir': {
+      path: 'empty-dir',
+      cid: 'QmUNLLsPACCz1vLxQVkXqqLX5R1X345qqfHbsf67hvA3Nn',
+      size: 4,
+      type: 'directory'
+    },
+    'pam/pum': {
+      cid: 'QmY8a78tx6Tk6naDgWCgTsd9EqGrUJRrH7dDyQhjyrmH2i',
+      path: 'pam/pum',
+      size: 1328386,
+      type: 'directory'
+    },
+    pam: {
+      cid: 'QmRgdtzNx1H1BPJqShdhvWZ2D4DA2HUgZJ3XLtoXei27Av',
+      path: 'pam',
+      size: 2656553,
+      type: 'directory'
+    },
+    '200Bytes.txt with raw leaves': extend({}, baseFiles['200Bytes.txt'], {
+      cid: 'QmQmZQxSKQppbsWfVzBvg59Cn3DKtsNVQ94bjAxg2h3Lb8',
+      size: 200
+    })
+  }, strategyOverrides[strategy])
+
+  const expected = extend({}, defaultResults, strategies[strategy])
+
+  const expectFiles = (actualFiles, expectedFiles) => {
+    expect(actualFiles.length).to.equal(expectedFiles.length)
+
+    for (let i = 0; i < expectedFiles.length; i++) {
+      const expectedFile = expected[expectedFiles[i]]
+      const actualFile = actualFiles[i]
+
+      expect(actualFile.path).to.equal(expectedFile.path)
+      expect(actualFile.cid.toBaseEncodedString('base58btc')).to.equal(expectedFile.cid)
+
+      if (actualFile.unixfs) {
+        expect(actualFile.unixfs.type).to.equal(expectedFile.type)
+
+        if (actualFile.unixfs.type === 'file') {
+          expect(actualFile.unixfs.fileSize()).to.equal(expectedFile.size)
+        }
+      }
+    }
+  }
+
+  describe('importer: ' + strategy, function () {
+    this.timeout(30 * 1000)
+
+    let ipld
+    const options = {
+      strategy: strategy
+    }
+
+    before(async () => {
+      ipld = await inMemory(IPLD)
+    })
+
+    it('fails on bad content', async () => {
+      try {
+        await all(importer([{
+          path: '200Bytes.txt',
+          content: 7
+        }], ipld, options))
+        throw new Error('No error was thrown')
+      } catch (err) {
+        expect(err.code).to.equal('ERR_INVALID_CONTENT')
+      }
+    })
+
+    it('fails on an iterator that yields bad content', async () => {
+      try {
+        await all(importer([{
+          path: '200Bytes.txt',
+          content: {
+            [Symbol.iterator]: function * () {
+              yield 7
+            }
+          }
+        }], ipld, options))
+        throw new Error('No error was thrown')
+      } catch (err) {
+        expect(err.code).to.equal('ERR_INVALID_CONTENT')
+      }
+    })
+
+    it('doesn\'t yield anything on empty source', async () => {
+      const files = await all(importer([], ipld, options))
+
+      expect(files).to.be.empty()
+    })
+
+    it('doesn\'t yield anything on empty file', async () => {
+      const files = await all(importer([{
+        path: 'emptyfile',
+        content: Buffer.alloc(0)
+      }], ipld, options))
+
+      expect(files.length).to.eql(1)
+
+      // always yield empty file node
+      expect(files[0].cid.toBaseEncodedString()).to.eql('QmbFMke1KXqnYyBBWxB74N4c5SBnJMVAiMNRcGu6x1AwQH')
+    })
+
+    it('fails on more than one root', async () => {
+      try {
+        await all(importer([{
+          path: 'beep/200Bytes.txt',
+          content: smallFile
+        }, {
+          path: 'boop/200Bytes.txt',
+          content: bigFile
+        }], ipld, options))
+
+        throw new Error('No error was thrown')
+      } catch (err) {
+        expect(err.code).to.equal('ERR_MORE_THAN_ONE_ROOT')
+      }
+    })
+
+    it('accepts strings as content', async () => {
+      const content = 'I am a string'
+      const res = await all(importer([{
+        path: '200Bytes.txt',
+        content
+      }], ipld, options))
+
+      const file = await exporter(res[0].cid, ipld)
+      const fileContent = await all(file.content())
+
+      expect(fileContent.toString()).to.equal(content)
+    })
+
+    it('small file with an escaped slash in the title', async () => {
+      const filePath = `small-\\/file-${Math.random()}.txt`
+      const files = await all(importer([{
+        path: filePath,
+        content: smallFile
+      }], ipld, options))
+
+      expect(files.length).to.equal(1)
+      expect(files[0].path).to.equal(filePath)
+    })
+
+    it('small file with square brackets in the title', async () => {
+      const filePath = `small-[v]-file-${Math.random()}.txt`
+      const files = await all(importer([{
+        path: filePath,
+        content: smallFile
+      }], ipld, options))
+
+      expect(files.length).to.equal(1)
+      expect(files[0].path).to.equal(filePath)
+    })
+
+    it('small file as buffer (smaller than a chunk)', async () => {
+      const files = await all(importer([{
+        path: '200Bytes.txt',
+        content: smallFile
+      }], ipld, options))
+
+      expectFiles(files, [
+        '200Bytes.txt'
+      ])
+    })
+
+    it('small file as array (smaller than a chunk)', async () => {
+      const files = await all(importer([{
+        path: '200Bytes.txt',
+        content: Array.from(smallFile)
+      }], ipld, options))
+
+      expectFiles(files, [
+        '200Bytes.txt'
+      ])
+    })
+
+    it('small file as string (smaller than a chunk)', async () => {
+      const files = await all(importer([{
+        path: 'small.txt',
+        content: 'this is a file\n'
+      }], ipld, options))
+
+      expectFiles(files, [
+        'small.txt'
+      ])
+    })
+
+    it('small file (smaller than a chunk) with raw leaves', async () => {
+      const files = await all(importer([{
+        path: '200Bytes.txt',
+        content: smallFile
+      }], ipld, {
+        ...options,
+        rawLeaves: true
+      }))
+
+      expectFiles(files, [
+        '200Bytes.txt with raw leaves'
+      ])
+    })
+
+    it('small file (smaller than a chunk) inside a dir', async () => {
+      const files = await all(importer([{
+        path: 'foo/bar/200Bytes.txt',
+        content: smallFile
+      }], ipld, options))
+
+      expectFiles(files, [
+        'foo/bar/200Bytes.txt',
+        'foo/bar',
+        'foo'
+      ])
+    })
+
+    it('file bigger than a single chunk', async () => {
+      this.timeout(60 * 1000)
+
+      const files = await all(importer([{
+        path: '1.2MiB.txt',
+        content: bigFile
+      }], ipld, options))
+
+      expectFiles(files, [
+        '1.2MiB.txt'
+      ])
+    })
+
+    it('file bigger than a single chunk inside a dir', async () => {
+      this.timeout(60 * 1000)
+
+      const files = await all(importer([{
+        path: 'foo-big/1.2MiB.txt',
+        content: bigFile
+      }], ipld, options))
+
+      expectFiles(files, [
+        'foo-big/1.2MiB.txt',
+        'foo-big'
+      ])
+    })
+
+    it('empty directory', async () => {
+      const files = await all(importer([{
+        path: 'empty-dir'
+      }], ipld, options))
+
+      expectFiles(files, [
+        'empty-dir'
+      ])
+    })
+
+    it('directory with files', async () => {
+      const files = await all(importer([{
+        path: 'pim/200Bytes.txt',
+        content: smallFile
+      }, {
+        path: 'pim/1.2MiB.txt',
+        content: bigFile
+      }], ipld, options))
+
+      expectFiles(files, [
+        'pim/200Bytes.txt',
+        'pim/1.2MiB.txt',
+        'pim'
+      ])
+    })
+
+    it('nested directory (2 levels deep)', async () => {
+      const files = await all(importer([{
+        path: 'pam/pum/200Bytes.txt',
+        content: smallFile
+      }, {
+        path: 'pam/pum/1.2MiB.txt',
+        content: bigFile
+      }, {
+        path: 'pam/1.2MiB.txt',
+        content: bigFile
+      }], ipld, options))
+
+      const result = stringifyMh(files)
+
+      expect(result.length).to.equal(5)
+
+      result.forEach(eachFile)
+
+      function eachFile (file) {
+        if (file.path === 'pam/pum/200Bytes.txt') {
+          expect(file.cid).to.equal(expected['200Bytes.txt'].cid)
+          expect(file.unixfs.fileSize()).to.equal(expected['200Bytes.txt'].size)
+        } else if (file.path === 'pam/pum/1.2MiB.txt') {
+          expect(file.cid).to.equal(expected['1.2MiB.txt'].cid)
+          expect(file.unixfs.fileSize()).to.equal(expected['1.2MiB.txt'].size)
+        } else if (file.path === 'pam/pum') {
+          expect(file.cid).to.equal(expected['pam/pum'].cid)
+        } else if (file.path === 'pam/1.2MiB.txt') {
+          expect(file.cid).to.equal(expected['1.2MiB.txt'].cid)
+          expect(file.unixfs.fileSize()).to.equal(expected['1.2MiB.txt'].size)
+        } else if (file.path === 'pam') {
+          expect(file.cid).to.equal(expected.pam.cid)
+        } else {
+          throw new Error(`Unexpected path ${file.path}`)
+        }
+      }
+    })
+
+    it('will not write to disk if passed "onlyHash" option', async () => {
+      const content = String(Math.random() + Date.now())
+      const files = await all(importer([{
+        path: content + '.txt',
+        content: Buffer.from(content)
+      }], ipld, {
+        onlyHash: true
+      }))
+
+      const file = files[0]
+      expect(file).to.exist()
+
+      try {
+        await ipld.get(file.cid)
+
+        throw new Error('No error was thrown')
+      } catch (err) {
+        expect(err.code).to.equal('ERR_NOT_FOUND')
+      }
+    })
+
+    it('will call an optional progress function', async () => {
+      const maxChunkSize = 2048
+
+      const options = {
+        progress: spy(),
+        maxChunkSize
+      }
+
+      await all(importer([{
+        path: '1.2MiB.txt',
+        content: bigFile
+      }], ipld, options))
+
+      expect(options.progress.called).to.equal(true)
+      expect(options.progress.args[0][0]).to.equal(maxChunkSize)
+    })
+
+    it('will import files with CID version 1', async () => {
+      const createInputFile = (path, size) => {
+        const name = String(Math.random() + Date.now())
+        path = path[path.length - 1] === '/' ? path : path + '/'
+        return {
+          path: path + name + '.txt',
+          content: Buffer.alloc(size).fill(1)
+        }
+      }
+
+      const inputFiles = [
+        createInputFile('/foo', 10),
+        createInputFile('/foo', 60),
+        createInputFile('/foo/bar', 78),
+        createInputFile('/foo/baz', 200),
+        // Bigger than maxChunkSize
+        createInputFile('/foo', 262144 + 45),
+        createInputFile('/foo/bar', 262144 + 134),
+        createInputFile('/foo/bar', 262144 + 79),
+        createInputFile('/foo/bar', 262144 + 876),
+        createInputFile('/foo/bar', 262144 + 21)
+      ]
+
+      const options = {
+        cidVersion: 1,
+        // Ensures we use DirSharded for the data below
+        shardSplitThreshold: 3
+      }
+
+      // Pass a copy of inputFiles, since the importer mutates them
+      const files = await all(importer(inputFiles.map(f => Object.assign({}, f)), ipld, options))
+
+      const file = files[0]
+      expect(file).to.exist()
+
+      for (let i = 0; i < file.length; i++) {
+        const file = files[i]
+
+        const cid = file.cid.toV1()
+        const inputFile = inputFiles.find(f => f.path === file.path)
+
+        // Just check the intermediate directory can be retrieved
+        if (!inputFile) {
+          await ipld.get(cid)
+        }
+
+        // Check the imported content is correct
+        const node = await exporter(cid, ipld)
+        const chunks = []
+
+        for await (const chunk of node.content()) {
+          chunks.push(chunk)
+        }
+
+        expect(Buffer.concat(chunks)).to.deep.equal(inputFile.content)
+      }
+    })
+
+    it('imports file with raw leaf nodes when specified', () => {
+      return checkLeafNodeTypes(ipld, {
+        leafType: 'raw'
+      }, 'raw')
+    })
+
+    it('imports file with file leaf nodes when specified', () => {
+      return checkLeafNodeTypes(ipld, {
+        leafType: 'file'
+      }, 'file')
+    })
+
+    it('reduces file to single node when specified', () => {
+      return checkNodeLinks(ipld, {
+        reduceSingleLeafToSelf: true
+      }, 0)
+    })
+
+    it('does not reduce file to single node when overidden by options', () => {
+      return checkNodeLinks(ipld, {
+        reduceSingleLeafToSelf: false
+      }, 1)
+    })
+
+    it('uses raw leaf nodes when requested', async () => {
+      this.timeout(60 * 1000)
+
+      const options = {
+        rawLeaves: true
+      }
+
+      for await (const file of importer([{
+        path: '1.2MiB.txt',
+        content: bigFile
+      }], ipld, options)) {
+        for await (const { cid } of collectLeafCids(file.cid, ipld)) {
+          expect(cid.codec).to.be('raw')
+          expect(cid.version).to.be(1)
+        }
+      }
+    })
+
+    it('supports passing mtime', async () => {
+      this.timeout(60 * 1000)
+
+      const options = {
+        rawLeaves: true
+      }
+      const now = new Date()
+
+      for await (const file of importer([{
+        path: '1.2MiB.txt',
+        content: bigFile,
+        mtime: now
+      }], ipld, options)) {
+        const node = await exporter(file.cid, ipld)
+
+        expect(node).to.have.nested.deep.property('unixfs.mtime', dateToTimespec(now))
+      }
+    })
+
+    it('supports passing mtime for directories', async () => {
+      this.timeout(60 * 1000)
+
+      const now = new Date()
+
+      const entries = await all(importer([{
+        path: '/foo',
+        mtime: now
+      }], ipld))
+
+      const node = await exporter(entries[0].cid, ipld)
+      expect(node).to.have.nested.deep.property('unixfs.mtime', dateToTimespec(now))
+    })
+
+    it('supports passing metadata for wrapping directories', async () => {
+      this.timeout(60 * 1000)
+
+      const now = new Date()
+      const perms = 0o0777
+
+      const entries = await all(importer([{
+        path: '/foo',
+        mtime: now,
+        mode: perms
+      }, {
+        path: '/foo/bar.txt',
+        content: bigFile
+      }], ipld))
+
+      const nodes = await all(exporter.recursive(entries[entries.length - 1].cid, ipld))
+      const node = nodes.filter(node => node.unixfs.type === 'directory').pop()
+
+      if (!node) {
+        expect.fail('no directory found')
+      }
+
+      expect(node).to.have.nested.deep.property('unixfs.mtime', dateToTimespec(now))
+      expect(node).to.have.nested.property('unixfs.mode', perms)
+    })
+
+    it('supports passing metadata for intermediate directories', async () => {
+      this.timeout(60 * 1000)
+
+      const now = new Date()
+      const perms = 0o0777
+
+      const entries = await all(importer([{
+        path: '/foo/bar',
+        mtime: now,
+        mode: perms
+      }, {
+        path: '/foo/bar/baz.txt',
+        content: bigFile
+      }], ipld))
+
+      const nodes = await all(exporter.recursive(entries[entries.length - 1].cid, ipld))
+      const node = nodes.filter(node => node.unixfs.type === 'directory').pop()
+
+      if (!node) {
+        expect.fail('no directory found')
+      }
+
+      expect(node).to.have.nested.deep.property('unixfs.mtime', dateToTimespec(now))
+      expect(node).to.have.nested.property('unixfs.mode', perms)
+    })
+
+    it('supports passing metadata for out of order intermediate directories', async () => {
+      this.timeout(60 * 1000)
+
+      const now = new Date()
+      const perms = 0o0777
+
+      const entries = await all(importer([{
+        path: '/foo/bar/qux.txt',
+        content: bigFile
+      }, {
+        path: '/foo/bar',
+        mtime: now,
+        mode: perms
+      }, {
+        path: '/foo/quux'
+      }, {
+        path: '/foo/bar/baz.txt',
+        content: bigFile
+      }], ipld))
+
+      const nodes = await all(exporter.recursive(entries[entries.length - 1].cid, ipld))
+      const node = nodes.filter(node => node.unixfs.type === 'directory' && node.name === 'bar').pop()
+
+      if (!node) {
+        expect.fail('no directory found')
+      }
+
+      expect(node).to.have.nested.deep.property('unixfs.mtime', dateToTimespec(now))
+      expect(node).to.have.nested.property('unixfs.mode', perms)
+    })
+
+    it('supports passing mtime for hamt-sharded-directories', async () => {
+      this.timeout(60 * 1000)
+
+      const now = new Date()
+
+      const entries = await all(importer([{
+        path: '/foo',
+        mtime: now
+      }, {
+        path: '/foo/bar.txt',
+        content: bigFile
+      }, {
+        path: '/foo/baz.txt',
+        content: bigFile
+      }, {
+        path: '/foo/qux'
+      }], ipld, {
+        shardSplitThreshold: 0
+      }))
+
+      const nodes = await all(exporter.recursive(entries[entries.length - 1].cid, ipld))
+      const node = nodes.filter(node => node.unixfs.type === 'hamt-sharded-directory').pop()
+
+      if (!node) {
+        expect.fail('no hamt-sharded-directory found')
+      }
+
+      expect(node).to.have.nested.deep.property('unixfs.mtime', dateToTimespec(now))
+    })
+
+    it('supports passing mode', async () => {
+      this.timeout(60 * 1000)
+
+      const options = {
+        rawLeaves: true
+      }
+      const mode = 0o0111
+
+      for await (const file of importer([{
+        path: '1.2MiB.txt',
+        content: bigFile,
+        mode
+      }], ipld, options)) {
+        const node = await exporter(file.cid, ipld)
+
+        expect(node).to.have.nested.property('unixfs.mode', mode)
+      }
+    })
+
+    it('supports passing mode for directories', async () => {
+      this.timeout(60 * 1000)
+
+      const mode = 0o0111
+
+      const entries = await all(importer([{
+        path: '/foo',
+        mode
+      }], ipld))
+
+      const node = await exporter(entries[0].cid, ipld)
+      expect(node).to.have.nested.property('unixfs.mode', mode)
+    })
+
+    it('supports passing different modes for different files', async () => {
+      this.timeout(60 * 1000)
+
+      const mode1 = 0o0111
+      const mode2 = 0o0222
+
+      const entries = await all(importer([{
+        path: '/foo/file1.txt',
+        content: bigFile,
+        mode: mode1
+      }, {
+        path: '/foo/file2.txt',
+        content: bigFile,
+        mode: mode2
+      }], ipld))
+
+      const node1 = await exporter(entries[0].cid, ipld)
+      expect(node1).to.have.nested.property('unixfs.mode', mode1)
+
+      const node2 = await exporter(entries[1].cid, ipld)
+      expect(node2).to.have.nested.property('unixfs.mode', mode2)
+    })
+
+    it('supports deeply nested files do not inherit custom metadata', async () => {
+      this.timeout(60 * 1000)
+
+      const mode = 0o0111
+
+      const entries = await all(importer([{
+        path: '/foo/file1.txt',
+        content: bigFile,
+        mode: mode
+      }, {
+        path: '/foo/bar/baz/file2.txt',
+        content: bigFile
+      }], ipld))
+
+      const node1 = await exporter(entries[0].cid, ipld)
+      expect(node1).to.have.nested.property('unixfs.mode', mode)
+
+      const node2 = await exporter(entries[1].cid, ipld)
+      expect(node2).to.have.nested.property('unixfs.mode').that.does.not.equal(mode)
+    })
+
+    it('files and directories get default mode if not specified', async () => {
+      this.timeout(60 * 1000)
+
+      const entries = await all(importer([{
+        path: '/foo/file1.txt',
+        content: bigFile
+      }], ipld))
+
+      const node1 = await exporter(entries[0].cid, ipld)
+      expect(node1).to.have.nested.property('unixfs.mode', 0o0644)
+
+      const node2 = await exporter(entries[1].cid, ipld)
+      expect(node2).to.have.nested.property('unixfs.mode', 0o0755)
+    })
+  })
+})
+
+describe('configuration', () => {
+  it('alllows configuring with custom dag and tree builder', async () => {
+    let builtTree = false
+    const ipld = 'ipld'
+    const entries = await all(importer([{
+      path: 'path',
+      content: 'content'
+    }], ipld, {
+      dagBuilder: async function * (source, ipld, opts) { // eslint-disable-line require-await
+        yield function () {
+          return Promise.resolve({
+            cid: 'cid',
+            path: 'path',
+            unixfs: 'unixfs'
+          })
+        }
+      },
+      treeBuilder: async function * (source, ipld, opts) { // eslint-disable-line require-await
+        builtTree = true
+        yield * source
+      }
+    }))
+
+    expect(entries).to.have.lengthOf(1)
+    expect(entries).to.have.nested.property('[0].cid', 'cid')
+    expect(entries).to.have.nested.property('[0].path', 'path')
+    expect(entries).to.have.nested.property('[0].unixfs', 'unixfs')
+
+    expect(builtTree).to.be.true()
+  })
+
+  it('alllows configuring with custom chunker', async () => {
+    let validated = false
+    let chunked = false
+    const ipld = {
+      put: () => 'cid'
+    }
+    const entries = await all(importer([{
+      path: 'path',
+      content: 'content'
+    }], ipld, {
+      chunkValidator: async function * (source, opts) { // eslint-disable-line require-await
+        validated = true
+        yield * source
+      },
+      chunker: async function * (source, opts) { // eslint-disable-line require-await
+        chunked = true
+        yield * source
+      }
+    }))
+
+    expect(entries).to.have.lengthOf(1)
+    expect(entries).to.have.nested.property('[0].cid', 'cid')
+    expect(entries).to.have.nested.property('[0].path', 'path')
+    expect(entries).to.have.nested.property('[0].unixfs')
+
+    expect(validated).to.be.true()
+    expect(chunked).to.be.true()
+  })
+})
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/2B/CIQCLHFOKW5OR6TMLOEULA42ZCNIUH5AGNU7R5OCASITUKITSBP22BA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/2B/CIQCLHFOKW5OR6TMLOEULA42ZCNIUH5AGNU7R5OCASITUKITSBP22BA.data
new file mode 100644
index 00000000..70641f44
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/2B/CIQCLHFOKW5OR6TMLOEULA42ZCNIUH5AGNU7R5OCASITUKITSBP22BA.data
@@ -0,0 +1,5 @@
+
+����wx���xM��{
+D���zH/&^����RS���/��v,��R
+�=��N���g~���pf1�\[�>�%��U�1�@Q���׀2&m6�q���Q؁��]��|���!�KE�~J֕읝�o�j��b�n3�eT�)D+;s
+컓��:Ty!c�3����\*���T7��E?[��Pv}��A+�c�x�~�e���
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/2F/CIQEUWUVLBXVFYSYCHHSCRTXCYHGIOBXKWUMKFR3UPAFHQ5WK5362FQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/2F/CIQEUWUVLBXVFYSYCHHSCRTXCYHGIOBXKWUMKFR3UPAFHQ5WK5362FQ.data
new file mode 100644
index 00000000..41456196
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/2F/CIQEUWUVLBXVFYSYCHHSCRTXCYHGIOBXKWUMKFR3UPAFHQ5WK5362FQ.data
@@ -0,0 +1,4 @@
+
+ys# js-ipfs-repo
+Implementation of the IPFS repo spec (https://github.com/ipfs/specs/tree/master/repo) in JavaScript
+s
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/2P/CIQLCBXFJEBKPWASINYRB5OBXDOZ3PBXLDJQNRVNYD7HUBGVZD6T2PA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/2P/CIQLCBXFJEBKPWASINYRB5OBXDOZ3PBXLDJQNRVNYD7HUBGVZD6T2PA.data
new file mode 100644
index 00000000..ce734230
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/2P/CIQLCBXFJEBKPWASINYRB5OBXDOZ3PBXLDJQNRVNYD7HUBGVZD6T2PA.data
@@ -0,0 +1,4 @@
+4
+" si����"�¹W�<����G|��e��4	� 3
+1.2MiB.txt��L
+
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/36/CIQPCRPIF437ZUEIOXQTYFMJJUQQQYH7GD5KH5KAGZO5AH32WSYA36Y.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/36/CIQPCRPIF437ZUEIOXQTYFMJJUQQQYH7GD5KH5KAGZO5AH32WSYA36Y.data
new file mode 100644
index 00000000..c52fcda4
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/36/CIQPCRPIF437ZUEIOXQTYFMJJUQQQYH7GD5KH5KAGZO5AH32WSYA36Y.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/4Q/CIQNGE6QOMDGK6PZN47RUX6ME526TDJRTIQD6I4KHCKAQFAK3UQR4QI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/4Q/CIQNGE6QOMDGK6PZN47RUX6ME526TDJRTIQD6I4KHCKAQFAK3UQR4QI.data
new file mode 100644
index 00000000..4f882793
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/4Q/CIQNGE6QOMDGK6PZN47RUX6ME526TDJRTIQD6I4KHCKAQFAK3UQR4QI.data
@@ -0,0 +1,3 @@
+
+
+�	��r[��
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/5B/CIQISQH67DCYN567CMOT7WV5DEB4G2V23S5VLOHTKJCG5DLHY3D65BY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/5B/CIQISQH67DCYN567CMOT7WV5DEB4G2V23S5VLOHTKJCG5DLHY3D65BY.data
new file mode 100644
index 00000000..615417b1
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/5B/CIQISQH67DCYN567CMOT7WV5DEB4G2V23S5VLOHTKJCG5DLHY3D65BY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/5V/CIQFFRR4O52TS2Z7QLDDTF32OIR4FWLKT5YLL7MLDVIT7DC3NHOK5VA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/5V/CIQFFRR4O52TS2Z7QLDDTF32OIR4FWLKT5YLL7MLDVIT7DC3NHOK5VA.data
new file mode 100644
index 00000000..951bfe04
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/5V/CIQFFRR4O52TS2Z7QLDDTF32OIR4FWLKT5YLL7MLDVIT7DC3NHOK5VA.data
@@ -0,0 +1,23 @@
+
+��                    IPFS Alpha Security Notes
+
+We try hard to ensure our system is safe and robust, but all software
+has bugs, especially new software. This distribution is meant to be an
+alpha preview, don't use it for anything mission critical.
+
+Please note the following:
+
+- This is alpha software and has not been audited. It is our goal
+  to conduct a proper security audit once we close in on a 1.0 release.
+
+- ipfs is a networked program, and may have serious undiscovered
+  vulnerabilities. It is written in Go, and we do not execute any
+  user provided data. But please point any problems out to us in a
+  github issue, or email security@ipfs.io privately.
+
+- ipfs uses encryption for all communication, but it's NOT PROVEN SECURE
+  YET!  It may be totally broken. For now, the code is included to make
+  sure we benchmark our operations with encryption in mind. In the future,
+  there will be an "unsafe" mode for high performance intranet apps.
+  If this is a blocking feature for you, please contact us.
+�
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/6S/CIQNA2D5X3XOZKCQR3S572FA2I3OAXB7BL7JBZBVJCWPAKBQEUT56SI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/6S/CIQNA2D5X3XOZKCQR3S572FA2I3OAXB7BL7JBZBVJCWPAKBQEUT56SI.data
new file mode 100644
index 00000000..d19d0c86
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/6S/CIQNA2D5X3XOZKCQR3S572FA2I3OAXB7BL7JBZBVJCWPAKBQEUT56SI.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/6Z/CIQP64AMQTERTSCVPF6RWBY5Z6PAZJUPHBNDXE7DCRPCRG6FKVSG6ZQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/6Z/CIQP64AMQTERTSCVPF6RWBY5Z6PAZJUPHBNDXE7DCRPCRG6FKVSG6ZQ.data
new file mode 100644
index 00000000..42f65bd9
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/6Z/CIQP64AMQTERTSCVPF6RWBY5Z6PAZJUPHBNDXE7DCRPCRG6FKVSG6ZQ.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/75/CIQMB7DLJFKD267QJ2B5FJNHZPTSVA7IB6OHXSQ2XSVEEKMKK6RT75I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/75/CIQMB7DLJFKD267QJ2B5FJNHZPTSVA7IB6OHXSQ2XSVEEKMKK6RT75I.data
new file mode 100644
index 00000000..c9885c45
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/75/CIQMB7DLJFKD267QJ2B5FJNHZPTSVA7IB6OHXSQ2XSVEEKMKK6RT75I.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/77/CIQJQST6CUA4IK56QW7VN4KFDQHKPSA6IA4NGFBUYHVR7FGTHSWB77I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/77/CIQJQST6CUA4IK56QW7VN4KFDQHKPSA6IA4NGFBUYHVR7FGTHSWB77I.data
new file mode 100644
index 00000000..e743bdbf
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/77/CIQJQST6CUA4IK56QW7VN4KFDQHKPSA6IA4NGFBUYHVR7FGTHSWB77I.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/7J/CIQKKLBWAIBQZOIS5X7E32LQAL6236OUKZTMHPQSFIXPWXNZHQOV7JQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/7J/CIQKKLBWAIBQZOIS5X7E32LQAL6236OUKZTMHPQSFIXPWXNZHQOV7JQ.data
new file mode 100644
index 00000000..627ffcdf
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/7J/CIQKKLBWAIBQZOIS5X7E32LQAL6236OUKZTMHPQSFIXPWXNZHQOV7JQ.data
@@ -0,0 +1,55 @@
+
+��
+                  IPFS -- Inter-Planetary File system
+
+IPFS is a global, versioned, peer-to-peer filesystem. It combines good ideas
+from Git, BitTorrent, Kademlia, SFS, and the Web. It is like a single bit-
+torrent swarm, exchanging git objects. IPFS provides an interface as simple
+as the HTTP web, but with permanence built in. You can also mount the world
+at /ipfs.
+
+IPFS is a protocol:
+- defines a content-addressed file system
+- coordinates content delivery
+- combines Kademlia + BitTorrent + Git
+
+IPFS is a filesystem:
+- has directories and files
+- mountable filesystem (via FUSE)
+
+IPFS is a web:
+- can be used to view documents like the web
+- files accessible via HTTP at `http://ipfs.io/<path>`
+- browsers or extensions can learn to use `ipfs://` directly
+- hash-addressed content guarantees authenticity
+
+IPFS is modular:
+- connection layer over any network protocol
+- routing layer
+- uses a routing layer DHT (kademlia/coral)
+- uses a path-based naming service
+- uses bittorrent-inspired block exchange
+
+IPFS uses crypto:
+- cryptographic-hash content addressing
+- block-level deduplication
+- file integrity + versioning
+- filesystem-level encryption + signing support
+
+IPFS is p2p:
+- worldwide peer-to-peer file transfers
+- completely decentralized architecture
+- **no** central point of failure
+
+IPFS is a cdn:
+- add a file to the filesystem locally, and it's now available to the world
+- caching-friendly (content-hash naming)
+- bittorrent-based bandwidth distribution
+
+IPFS has a name service:
+- IPNS, an SFS inspired name system
+- global namespace based on PKI
+- serves to build trust chains
+- compatible with other NSes
+- can map DNS, .onion, .bit, etc to IPNS
+�
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/7R/CIQGNXYJ6NHQTTNWY7E7MLOVZD5BRFMJL3H27REITBBUWURIKQTP7RQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/7R/CIQGNXYJ6NHQTTNWY7E7MLOVZD5BRFMJL3H27REITBBUWURIKQTP7RQ.data
new file mode 100644
index 00000000..42c502e2
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/7R/CIQGNXYJ6NHQTTNWY7E7MLOVZD5BRFMJL3H27REITBBUWURIKQTP7RQ.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/A3/CIQD76D3PRB4H6QE6C7DJX224YXYFEKLENPHGHEJPQZ723Z4L4IUA3I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/A3/CIQD76D3PRB4H6QE6C7DJX224YXYFEKLENPHGHEJPQZ723Z4L4IUA3I.data
new file mode 100644
index 00000000..46fecabf
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/A3/CIQD76D3PRB4H6QE6C7DJX224YXYFEKLENPHGHEJPQZ723Z4L4IUA3I.data
@@ -0,0 +1,2 @@
+
+����Q�������
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AD/CIQAC3NNL5OBTUOK2A4CXXDS5Y6DLLCSC26OJVKNQ5IVDXLHGM5HADA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AD/CIQAC3NNL5OBTUOK2A4CXXDS5Y6DLLCSC26OJVKNQ5IVDXLHGM5HADA.data
new file mode 100644
index 00000000..1379fd9c
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AD/CIQAC3NNL5OBTUOK2A4CXXDS5Y6DLLCSC26OJVKNQ5IVDXLHGM5HADA.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AD/CIQDKCWPMK632NCNXGUY4TT465TRBMZJ5XRELAX655W3OS5K7ZHVADI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AD/CIQDKCWPMK632NCNXGUY4TT465TRBMZJ5XRELAX655W3OS5K7ZHVADI.data
new file mode 100644
index 00000000..ee87b15f
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AD/CIQDKCWPMK632NCNXGUY4TT465TRBMZJ5XRELAX655W3OS5K7ZHVADI.data
@@ -0,0 +1,1452 @@
+
+����l systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+
+��
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AE/CIQONICFQZH7QVU6IPSIM3AK7AD554D3BWZPAGEAQYQOWMFZQDUUAEI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AE/CIQONICFQZH7QVU6IPSIM3AK7AD554D3BWZPAGEAQYQOWMFZQDUUAEI.data
new file mode 100644
index 00000000..6860441a
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AE/CIQONICFQZH7QVU6IPSIM3AK7AD554D3BWZPAGEAQYQOWMFZQDUUAEI.data
@@ -0,0 +1,3 @@
+/
+" g�q���6\u�8~:��6~�g���Z.��direct�T2
+" 6(���%݄���.��Ӿ5(���������ab	recursive�T
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AM/CIQHG2O56243WIVNYK4VP2Z4U73KN42HPSC3MZPSDW2QQNAJD6ICAMY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AM/CIQHG2O56243WIVNYK4VP2Z4U73KN42HPSC3MZPSDW2QQNAJD6ICAMY.data
new file mode 100644
index 00000000..f57749f0
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AM/CIQHG2O56243WIVNYK4VP2Z4U73KN42HPSC3MZPSDW2QQNAJD6ICAMY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AM/CIQLILAMXCIV5B2ONEE63TLHVKS52D52O77I52JGFOH7U3ACVOKNAMY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AM/CIQLILAMXCIV5B2ONEE63TLHVKS52D52O77I52JGFOH7U3ACVOKNAMY.data
new file mode 100644
index 00000000..6a0cbe82
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AM/CIQLILAMXCIV5B2ONEE63TLHVKS52D52O77I52JGFOH7U3ACVOKNAMY.data
@@ -0,0 +1,3 @@
+
+
+Q�������
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AP/CIQHAKDLTL5GMIFGN5YVY4BA22FPHUIODJEXS4LCTQDWA275XAJDAPI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AP/CIQHAKDLTL5GMIFGN5YVY4BA22FPHUIODJEXS4LCTQDWA275XAJDAPI.data
new file mode 100644
index 00000000..74de75af
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AP/CIQHAKDLTL5GMIFGN5YVY4BA22FPHUIODJEXS4LCTQDWA275XAJDAPI.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AU/CIQHESGWL6ZUVT4RKZS6V2ZJ4IDV7RR6M6FQFAOFWXOJYNVRTHTMAUI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AU/CIQHESGWL6ZUVT4RKZS6V2ZJ4IDV7RR6M6FQFAOFWXOJYNVRTHTMAUI.data
new file mode 100644
index 00000000..f4c039c2
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AU/CIQHESGWL6ZUVT4RKZS6V2ZJ4IDV7RR6M6FQFAOFWXOJYNVRTHTMAUI.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AY/CIQGCTOPJA57F6YZZAVSFL44RVD2GWTNJ7OPQXRT3NG7NGD633QHAYY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AY/CIQGCTOPJA57F6YZZAVSFL44RVD2GWTNJ7OPQXRT3NG7NGD633QHAYY.data
new file mode 100644
index 00000000..8eb2a515
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AY/CIQGCTOPJA57F6YZZAVSFL44RVD2GWTNJ7OPQXRT3NG7NGD633QHAYY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/BY/CIQGTEDX5RFSVBJ7FQAHSZGMC5HOO4XG5GZAZRZ5EA43NKKQEJXWBYY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/BY/CIQGTEDX5RFSVBJ7FQAHSZGMC5HOO4XG5GZAZRZ5EA43NKKQEJXWBYY.data
new file mode 100644
index 00000000..a9c1c069
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/BY/CIQGTEDX5RFSVBJ7FQAHSZGMC5HOO4XG5GZAZRZ5EA43NKKQEJXWBYY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/C2/CIQK5PD7SAKEC7TPLYXS3EZQRAVYTFPR5ZY6HTI7DDQAG6AZQOXIC2I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/C2/CIQK5PD7SAKEC7TPLYXS3EZQRAVYTFPR5ZY6HTI7DDQAG6AZQOXIC2I.data
new file mode 100644
index 00000000..1067edb4
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/C2/CIQK5PD7SAKEC7TPLYXS3EZQRAVYTFPR5ZY6HTI7DDQAG6AZQOXIC2I.data
@@ -0,0 +1,6 @@
+
+��
+���wx���xM��{
+D���zH/&^����RS���/��v,��R
+�=��N���g~���pf1�\[�>�%��U�1�@Q���׀2&m6�q���Q؁��]��|���!�KE�~J֕읝�o�j��b�n3�eT�)D+;s
+컓��:Ty!c�3����\*���T7��E?[��Pv}��A+�c�x�~�e���
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/C3/CIQHS67KEOPN356BDJDRIMDCS5NQBMLCVMUGUAM2BWZNUWV7WZ7FC3Q.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/C3/CIQHS67KEOPN356BDJDRIMDCS5NQBMLCVMUGUAM2BWZNUWV7WZ7FC3Q.data
new file mode 100644
index 00000000..4741988d
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/C3/CIQHS67KEOPN356BDJDRIMDCS5NQBMLCVMUGUAM2BWZNUWV7WZ7FC3Q.data
@@ -0,0 +1,4729 @@
+
+����e academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others ��
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/C3/CIQLFQWZO45IGFSCQEDJKOZCAYNHTNVD6NVXVPWWBHATU2YG4E6HC3A.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/C3/CIQLFQWZO45IGFSCQEDJKOZCAYNHTNVD6NVXVPWWBHATU2YG4E6HC3A.data
new file mode 100644
index 00000000..df20559d
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/C3/CIQLFQWZO45IGFSCQEDJKOZCAYNHTNVD6NVXVPWWBHATU2YG4E6HC3A.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/C4/CIQDDZ5EDQK5AP7LRTLZHQZUR2R3GECRFV3WPKNL7PL2SKFIL2LXC4Y.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/C4/CIQDDZ5EDQK5AP7LRTLZHQZUR2R3GECRFV3WPKNL7PL2SKFIL2LXC4Y.data
new file mode 100644
index 00000000..ecce1053
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/C4/CIQDDZ5EDQK5AP7LRTLZHQZUR2R3GECRFV3WPKNL7PL2SKFIL2LXC4Y.data
@@ -0,0 +1,4 @@
+5
+" ���׾F�_�uؔ�l��z�S?��|ڲ��Pc@js-ipfs-repo�
+
+
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/CB/CIQGXO5AO2FIISVRDFHFQ5W7W3HEHGNS5KD2JNYYZGCQ3NQPV22QCBI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/CB/CIQGXO5AO2FIISVRDFHFQ5W7W3HEHGNS5KD2JNYYZGCQ3NQPV22QCBI.data
new file mode 100644
index 00000000..96566028
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/CB/CIQGXO5AO2FIISVRDFHFQ5W7W3HEHGNS5KD2JNYYZGCQ3NQPV22QCBI.data
@@ -0,0 +1,6 @@
+
+��5
+" $��G�,�A�4{���x�Z/.����D`�200Bytes.txt�3
+" Y��9_)a���˹2�R�m�Ŗke�9��dir-another0
+" Ty�5;_9Yf�q��F�Lhyl���/��level-1�
+�
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/CJ/CIQLMFYJNAZ2H33DVIPIS3A7S2WIMHBY62BY5OFCIR2NVHZUHT7PCJY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/CJ/CIQLMFYJNAZ2H33DVIPIS3A7S2WIMHBY62BY5OFCIR2NVHZUHT7PCJY.data
new file mode 100644
index 00000000..fa45ee79
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/CJ/CIQLMFYJNAZ2H33DVIPIS3A7S2WIMHBY62BY5OFCIR2NVHZUHT7PCJY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/CY/CIQDMKFEUGKSLXMEXO774EZOYCYNHPRVFD53ZSAU7237F67XDSQGCYQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/CY/CIQDMKFEUGKSLXMEXO774EZOYCYNHPRVFD53ZSAU7237F67XDSQGCYQ.data
new file mode 100644
index 00000000..bbe6bda7
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/CY/CIQDMKFEUGKSLXMEXO774EZOYCYNHPRVFD53ZSAU7237F67XDSQGCYQ.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/DJ/CIQCKYVVKKTZ5H7RDBHOBR72KDZZ7Y4BAVSMNITBWIGNAQQFG4UUDJQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/DJ/CIQCKYVVKKTZ5H7RDBHOBR72KDZZ7Y4BAVSMNITBWIGNAQQFG4UUDJQ.data
new file mode 100644
index 00000000..b99ceb21
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/DJ/CIQCKYVVKKTZ5H7RDBHOBR72KDZZ7Y4BAVSMNITBWIGNAQQFG4UUDJQ.data
@@ -0,0 +1,3 @@
+,
+" ��`�u�>/2��lil�f��YB�'M%���bar�
+
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/DZ/CIQH7E5UPL3DQGEXQXDT2QKNMU2GRLW43TH7QFHITLDVLJKQFZYEDZI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/DZ/CIQH7E5UPL3DQGEXQXDT2QKNMU2GRLW43TH7QFHITLDVLJKQFZYEDZI.data
new file mode 100644
index 00000000..be380799
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/DZ/CIQH7E5UPL3DQGEXQXDT2QKNMU2GRLW43TH7QFHITLDVLJKQFZYEDZI.data
@@ -0,0 +1,4730 @@
+
+����[7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successfu��
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/DZ/CIQL7TG2PB52XIZLLHDYIUFMHUQLMMZWBNBZSLDXFCPZ5VDNQQ2WDZQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/DZ/CIQL7TG2PB52XIZLLHDYIUFMHUQLMMZWBNBZSLDXFCPZ5VDNQQ2WDZQ.data
new file mode 100644
index 00000000..508cff2e
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/DZ/CIQL7TG2PB52XIZLLHDYIUFMHUQLMMZWBNBZSLDXFCPZ5VDNQQ2WDZQ.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/E7/CIQB3KY36M7HMZI5BLRPMPN7LOPHD2LZWNGBZR5BTOBHNWGBDFTAE7A.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/E7/CIQB3KY36M7HMZI5BLRPMPN7LOPHD2LZWNGBZR5BTOBHNWGBDFTAE7A.data
new file mode 100644
index 00000000..0b520379
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/E7/CIQB3KY36M7HMZI5BLRPMPN7LOPHD2LZWNGBZR5BTOBHNWGBDFTAE7A.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/EN/CIQCRUFL6YJQJMIKI6BX7HZT3BZQJV45EDKQXCYAUET3RJH5DDU3ENY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/EN/CIQCRUFL6YJQJMIKI6BX7HZT3BZQJV45EDKQXCYAUET3RJH5DDU3ENY.data
new file mode 100644
index 00000000..e705b9b0
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/EN/CIQCRUFL6YJQJMIKI6BX7HZT3BZQJV45EDKQXCYAUET3RJH5DDU3ENY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/EV/CIQJ5DVL3AD53EYMWBQG7MMACDUYWJDJ57AS6VIHPMOWJZEFZDUBEVI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/EV/CIQJ5DVL3AD53EYMWBQG7MMACDUYWJDJ57AS6VIHPMOWJZEFZDUBEVI.data
new file mode 100644
index 00000000..725a9b22
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/EV/CIQJ5DVL3AD53EYMWBQG7MMACDUYWJDJ57AS6VIHPMOWJZEFZDUBEVI.data
@@ -0,0 +1,5 @@
+
+@:4
+" si����"�¹W�<����G|��e��4	� 3
+1.2MiB.txt��L
+:
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/F4/CIQEQSMVHW6MIRMY6OMBNBTMNWLDKXZT5LYQATLBVMMIB7PLH3O6F4A.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/F4/CIQEQSMVHW6MIRMY6OMBNBTMNWLDKXZT5LYQATLBVMMIB7PLH3O6F4A.data
new file mode 100644
index 00000000..bfe6600f
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/F4/CIQEQSMVHW6MIRMY6OMBNBTMNWLDKXZT5LYQATLBVMMIB7PLH3O6F4A.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/FD/CIQJ2TCGF6GE6PCUOMKLCKVFYKXRQ3TSVG6EZM2UY5ZIBJ22L43SFDQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/FD/CIQJ2TCGF6GE6PCUOMKLCKVFYKXRQ3TSVG6EZM2UY5ZIBJ22L43SFDQ.data
new file mode 100644
index 00000000..2424f592
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/FD/CIQJ2TCGF6GE6PCUOMKLCKVFYKXRQ3TSVG6EZM2UY5ZIBJ22L43SFDQ.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/FH/CIQA2BWUV64FUQIWMLOIQLCSZHDZ45BCX3DCYBTPQIKXAWEAW3J2FHA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/FH/CIQA2BWUV64FUQIWMLOIQLCSZHDZ45BCX3DCYBTPQIKXAWEAW3J2FHA.data
new file mode 100644
index 00000000..72674694
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/FH/CIQA2BWUV64FUQIWMLOIQLCSZHDZ45BCX3DCYBTPQIKXAWEAW3J2FHA.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/FN/CIQIXBZMUTXFC5QIGMLJNXLLHZOPGSL2PBC65D4UIVWM6TI5F5TAFNI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/FN/CIQIXBZMUTXFC5QIGMLJNXLLHZOPGSL2PBC65D4UIVWM6TI5F5TAFNI.data
new file mode 100644
index 00000000..3da92595
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/FN/CIQIXBZMUTXFC5QIGMLJNXLLHZOPGSL2PBC65D4UIVWM6TI5F5TAFNI.data
@@ -0,0 +1,24 @@
+
+��The MIT License (MIT)
+
+Copyright (c) 2015 IPFS
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+�
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/FO/CIQEJ66ULKLILRVZ27ZTFGBIC3UBVLG47MFXU5BNPTUNOD6T2YUXFOI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/FO/CIQEJ66ULKLILRVZ27ZTFGBIC3UBVLG47MFXU5BNPTUNOD6T2YUXFOI.data
new file mode 100644
index 00000000..2a6dbb58
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/FO/CIQEJ66ULKLILRVZ27ZTFGBIC3UBVLG47MFXU5BNPTUNOD6T2YUXFOI.data
@@ -0,0 +1,2 @@
+
+ ��L �� �� �� �� ��
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/FX/CIQHQPKVAQUPZWCBVFLZUCHA2EDBTJBDRNVM3RZ4RT3JGIJA4H3OFXY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/FX/CIQHQPKVAQUPZWCBVFLZUCHA2EDBTJBDRNVM3RZ4RT3JGIJA4H3OFXY.data
new file mode 100644
index 00000000..8c345f38
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/FX/CIQHQPKVAQUPZWCBVFLZUCHA2EDBTJBDRNVM3RZ4RT3JGIJA4H3OFXY.data
@@ -0,0 +1,4732 @@
+
+���� systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among th��
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/GA/CIQHJVBBNCQ4MK2HS3TXVFMR3DY7VNDIL5THJ2V6ZR3XHEJTMO2FGAI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/GA/CIQHJVBBNCQ4MK2HS3TXVFMR3DY7VNDIL5THJ2V6ZR3XHEJTMO2FGAI.data
new file mode 100644
index 00000000..e3ec206f
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/GA/CIQHJVBBNCQ4MK2HS3TXVFMR3DY7VNDIL5THJ2V6ZR3XHEJTMO2FGAI.data
@@ -0,0 +1,5 @@
+5
+" $��G�,�A�4{���x�Z/.����D`�200Bytes.txt�3
+" Y��9_)a���˹2�R�m�Ŗke�9��dir-another0
+" Ty�5;_9Yf�q��F�Lhyl���/��level-1�
+
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/GQ/CIQH7OEYWXL34RWYL7VXLWEU4FWPVGT24VJT7DUZPTNLF25N25IGGQA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/GQ/CIQH7OEYWXL34RWYL7VXLWEU4FWPVGT24VJT7DUZPTNLF25N25IGGQA.data
new file mode 100644
index 00000000..ee87b9db
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/GQ/CIQH7OEYWXL34RWYL7VXLWEU4FWPVGT24VJT7DUZPTNLF25N25IGGQA.data
@@ -0,0 +1,4 @@
+0
+" ��,��Qv3��k>\�IzxEEl�M/f�LICENSE�1
+" JZ�XoR�X�!Fwd87U��;��SöWw�	README.md{
+
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/HD/CIQDDVW2EZIJF4NQH7WJNESD7XHQSXA5EGJVNTPVHD7444C2KLKXHDI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/HD/CIQDDVW2EZIJF4NQH7WJNESD7XHQSXA5EGJVNTPVHD7444C2KLKXHDI.data
new file mode 100644
index 00000000..5ea0edda
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/HD/CIQDDVW2EZIJF4NQH7WJNESD7XHQSXA5EGJVNTPVHD7444C2KLKXHDI.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/HD/CIQHILW6C2T5CZPUWAAOL5AJJEZ3MHJ5BNX7WPFW5RNT53JSYDZKHDY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/HD/CIQHILW6C2T5CZPUWAAOL5AJJEZ3MHJ5BNX7WPFW5RNT53JSYDZKHDY.data
new file mode 100644
index 00000000..e845c839
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/HD/CIQHILW6C2T5CZPUWAAOL5AJJEZ3MHJ5BNX7WPFW5RNT53JSYDZKHDY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/HV/CIQOSORZMMBDPROY2NYNBLJRYMPGU23M5WLZD7BMT7RIF7RFJEOHHVY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/HV/CIQOSORZMMBDPROY2NYNBLJRYMPGU23M5WLZD7BMT7RIF7RFJEOHHVY.data
new file mode 100644
index 00000000..4eb5d7bf
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/HV/CIQOSORZMMBDPROY2NYNBLJRYMPGU23M5WLZD7BMT7RIF7RFJEOHHVY.data
@@ -0,0 +1,4 @@
+
+A;5
+" $��G�,�A�4{���x�Z/.����D`�200Bytes.txt�
+;
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/HV/CIQPPIDDACRG4OUFQJQ7HWNALKTF6V5TVUZG34DEHJWZ2CFADQEQHVY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/HV/CIQPPIDDACRG4OUFQJQ7HWNALKTF6V5TVUZG34DEHJWZ2CFADQEQHVY.data
new file mode 100644
index 00000000..a762644a
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/HV/CIQPPIDDACRG4OUFQJQ7HWNALKTF6V5TVUZG34DEHJWZ2CFADQEQHVY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/I2/CIQMRL3OZOQ6FWVCILNBKQYHSKHGOHWXORGFUFDU6Z3SFI6MWC7CI2I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/I2/CIQMRL3OZOQ6FWVCILNBKQYHSKHGOHWXORGFUFDU6Z3SFI6MWC7CI2I.data
new file mode 100644
index 00000000..8e5a1d76
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/I2/CIQMRL3OZOQ6FWVCILNBKQYHSKHGOHWXORGFUFDU6Z3SFI6MWC7CI2I.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/IB/CIQMYGTLMBSYWR7X6Z676GQTL3W5NOSHG2QSNMWMASRY47R6DISDIBY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/IB/CIQMYGTLMBSYWR7X6Z676GQTL3W5NOSHG2QSNMWMASRY47R6DISDIBY.data
new file mode 100644
index 00000000..5b090964
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/IB/CIQMYGTLMBSYWR7X6Z676GQTL3W5NOSHG2QSNMWMASRY47R6DISDIBY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/IH/CIQA44S3C5B67N6QBLHMMHVPPOUE7L6CUBCDZVQGGAOYAGF3PHL3IHQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/IH/CIQA44S3C5B67N6QBLHMMHVPPOUE7L6CUBCDZVQGGAOYAGF3PHL3IHQ.data
new file mode 100644
index 00000000..f9810363
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/IH/CIQA44S3C5B67N6QBLHMMHVPPOUE7L6CUBCDZVQGGAOYAGF3PHL3IHQ.data
@@ -0,0 +1,4728 @@
+
+����There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some��
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/IL/CIQJFGRQHQ45VCQLM7AJNF2GF5UHUAGGHC6LLAH6VYDEKLQMD4QLILY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/IL/CIQJFGRQHQ45VCQLM7AJNF2GF5UHUAGGHC6LLAH6VYDEKLQMD4QLILY.data
new file mode 100644
index 00000000..62d1c297
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/IL/CIQJFGRQHQ45VCQLM7AJNF2GF5UHUAGGHC6LLAH6VYDEKLQMD4QLILY.data
@@ -0,0 +1,8 @@
+
+��Come hang out in our IRC chat room if you have any questions.
+
+Contact the ipfs dev team:
+- Bugs: https://github.com/ipfs/go-ipfs/issues
+- Help: irc.freenode.org/#ipfs
+- Email: dev@ipfs.io
+�
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/J3/CIQCUWNWXF4BLH7JZBCXMVGYBS5FJPGJ6W6SP2WIZ4K7PJVNE4IXJ3I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/J3/CIQCUWNWXF4BLH7JZBCXMVGYBS5FJPGJ6W6SP2WIZ4K7PJVNE4IXJ3I.data
new file mode 100644
index 00000000..00360cfb
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/J3/CIQCUWNWXF4BLH7JZBCXMVGYBS5FJPGJ6W6SP2WIZ4K7PJVNE4IXJ3I.data
@@ -0,0 +1,3 @@
+4
+" ��U��Fr���n����b��⇾�?��|<�	test-data���
+
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/KB/CIQFDNVHVCWCRUXO3W7UGOPXRQBCBQYCHLVLZGSOLCJH6MH53TULKBI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/KB/CIQFDNVHVCWCRUXO3W7UGOPXRQBCBQYCHLVLZGSOLCJH6MH53TULKBI.data
new file mode 100644
index 00000000..026ac913
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/KB/CIQFDNVHVCWCRUXO3W7UGOPXRQBCBQYCHLVLZGSOLCJH6MH53TULKBI.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/KL/CIQI55DVBRTRLGOEFG3NSUVUVDC5FV3BQJ6BDG6TQLX5ZMDXH3CDKLY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/KL/CIQI55DVBRTRLGOEFG3NSUVUVDC5FV3BQJ6BDG6TQLX5ZMDXH3CDKLY.data
new file mode 100644
index 00000000..7c40850f
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/KL/CIQI55DVBRTRLGOEFG3NSUVUVDC5FV3BQJ6BDG6TQLX5ZMDXH3CDKLY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/KN/CIQC2GMPEBY4FY6LK3KAVQ6KPD2RFEURQB672H4QQPHGWO7HJZAIKNA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/KN/CIQC2GMPEBY4FY6LK3KAVQ6KPD2RFEURQB672H4QQPHGWO7HJZAIKNA.data
new file mode 100644
index 00000000..912b64e0
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/KN/CIQC2GMPEBY4FY6LK3KAVQ6KPD2RFEURQB672H4QQPHGWO7HJZAIKNA.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/KV/CIQEB4J2WBPZIHHIIG4LFC3VPCUL7IRICU6DOD4BWS6GFOQNMZSAKVI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/KV/CIQEB4J2WBPZIHHIIG4LFC3VPCUL7IRICU6DOD4BWS6GFOQNMZSAKVI.data
new file mode 100644
index 00000000..9f1e7af6
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/KV/CIQEB4J2WBPZIHHIIG4LFC3VPCUL7IRICU6DOD4BWS6GFOQNMZSAKVI.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LA/CIQOK7LUYNC5GJDWTV6SEZ4ZJJ64QAMOZ7DFV5GWMJMTD3PN73HPLAY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LA/CIQOK7LUYNC5GJDWTV6SEZ4ZJJ64QAMOZ7DFV5GWMJMTD3PN73HPLAY.data
new file mode 100644
index 00000000..dcd69d0b
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LA/CIQOK7LUYNC5GJDWTV6SEZ4ZJJ64QAMOZ7DFV5GWMJMTD3PN73HPLAY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LG/CIQJBQD2O6K4CGJVCCTJNUP57QHR4SKHZ74OIITBBGLOMCO3ZOLWLGA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LG/CIQJBQD2O6K4CGJVCCTJNUP57QHR4SKHZ74OIITBBGLOMCO3ZOLWLGA.data
new file mode 100644
index 00000000..71be805f
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LG/CIQJBQD2O6K4CGJVCCTJNUP57QHR4SKHZ74OIITBBGLOMCO3ZOLWLGA.data
@@ -0,0 +1,9 @@
+
+��Some helpful resources for finding your way around ipfs:
+
+- quick-start: a quick show of various ipfs features.
+- ipfs commands: a list of all commands
+- ipfs --help: every command describes itself
+- https://github.com/ipfs/go-ipfs -- the src repository
+- #ipfs on irc.freenode.org -- the community irc channel
+�
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LM/CIQIFHHL6JGE5AT5CX66FZUHQKFZ3F5J4HDTDEDSFIFRZJOOHYHYLMI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LM/CIQIFHHL6JGE5AT5CX66FZUHQKFZ3F5J4HDTDEDSFIFRZJOOHYHYLMI.data
new file mode 100644
index 00000000..aacafb9f
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LM/CIQIFHHL6JGE5AT5CX66FZUHQKFZ3F5J4HDTDEDSFIFRZJOOHYHYLMI.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LO/CIQJLKDKHMLW3CBIFI6FNN3RDDQJQ37UPBHLBSDE4IODA3OGUYZNLOI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LO/CIQJLKDKHMLW3CBIFI6FNN3RDDQJQ37UPBHLBSDE4IODA3OGUYZNLOI.data
new file mode 100644
index 00000000..ca141be2
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LO/CIQJLKDKHMLW3CBIFI6FNN3RDDQJQ37UPBHLBSDE4IODA3OGUYZNLOI.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LU/CIQKRGVCTXRXFOEYT7SM4CCDVTLJKVZBCKJ2K25QUHDSDC54EBIKLUI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LU/CIQKRGVCTXRXFOEYT7SM4CCDVTLJKVZBCKJ2K25QUHDSDC54EBIKLUI.data
new file mode 100644
index 00000000..69e8f9e4
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LU/CIQKRGVCTXRXFOEYT7SM4CCDVTLJKVZBCKJ2K25QUHDSDC54EBIKLUI.data
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/MJ/CIQJIH4MW534AFHKJ4RJWSADQQKP5JF75FJD5HAG2FNU24Y5GDLNMJA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/MJ/CIQJIH4MW534AFHKJ4RJWSADQQKP5JF75FJD5HAG2FNU24Y5GDLNMJA.data
new file mode 100644
index 00000000..637f391c
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/MJ/CIQJIH4MW534AFHKJ4RJWSADQQKP5JF75FJD5HAG2FNU24Y5GDLNMJA.data
@@ -0,0 +1,2 @@
+
+����x\�΃��
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/NW/CIQHKHTR6BILKGTUCWOFDAU3EEHTE3TTXRHQU4JOD5RWMJNIKFKCNWA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/NW/CIQHKHTR6BILKGTUCWOFDAU3EEHTE3TTXRHQU4JOD5RWMJNIKFKCNWA.data
new file mode 100644
index 00000000..44403205
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/NW/CIQHKHTR6BILKGTUCWOFDAU3EEHTE3TTXRHQU4JOD5RWMJNIKFKCNWA.data
@@ -0,0 +1,3 @@
+
+
+x\�΃��
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/NY/CIQAS5P5V6R6ZWYCMEMIG77GPKPNN3IR55NKZVQ2KFWN35IZWHFVNYI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/NY/CIQAS5P5V6R6ZWYCMEMIG77GPKPNN3IR55NKZVQ2KFWN35IZWHFVNYI.data
new file mode 100644
index 00000000..cbd601a6
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/NY/CIQAS5P5V6R6ZWYCMEMIG77GPKPNN3IR55NKZVQ2KFWN35IZWHFVNYI.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/O6/CIQOYW2THIZBRGI7IN33ROGCKOFZLXJJ2MPKYZBTV4H3N7GYHXMAO6A.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/O6/CIQOYW2THIZBRGI7IN33ROGCKOFZLXJJ2MPKYZBTV4H3N7GYHXMAO6A.data
new file mode 100644
index 00000000..7b58d6c8
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/O6/CIQOYW2THIZBRGI7IN33ROGCKOFZLXJJ2MPKYZBTV4H3N7GYHXMAO6A.data
@@ -0,0 +1,3 @@
+/
+" �@������ԆD���g����A���7direct�T2
+" �;A��P���Y0��k��}�E�=��p����	recursive�T
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/O7/CIQP7DQNED7CUCZXDXMNPJC2VQXADCIZ6KIZ3JF6FZDZYYKJMDCFO7Y.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/O7/CIQP7DQNED7CUCZXDXMNPJC2VQXADCIZ6KIZ3JF6FZDZYYKJMDCFO7Y.data
new file mode 100644
index 00000000..46d10573
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/O7/CIQP7DQNED7CUCZXDXMNPJC2VQXADCIZ6KIZ3JF6FZDZYYKJMDCFO7Y.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/OC/CIQC2GOQFKDUPKVP4XNQVV7M5ZJ4MP57IEM3W2WKV6GLUPPIZTZXOCQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/OC/CIQC2GOQFKDUPKVP4XNQVV7M5ZJ4MP57IEM3W2WKV6GLUPPIZTZXOCQ.data
new file mode 100644
index 00000000..3f5311b7
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/OC/CIQC2GOQFKDUPKVP4XNQVV7M5ZJ4MP57IEM3W2WKV6GLUPPIZTZXOCQ.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/OD/CIQOJATAB4IXQU4GS3N6IKABVBWLIVP5HQQOBQLSENPEZBXAU5WGODY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/OD/CIQOJATAB4IXQU4GS3N6IKABVBWLIVP5HQQOBQLSENPEZBXAU5WGODY.data
new file mode 100644
index 00000000..f0b3a599
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/OD/CIQOJATAB4IXQU4GS3N6IKABVBWLIVP5HQQOBQLSENPEZBXAU5WGODY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/OY/CIQDEMXSPYNNJT5CP2V6OMAMSK3VAN5F525AUQF4O5SHFSENFK6MOYA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/OY/CIQDEMXSPYNNJT5CP2V6OMAMSK3VAN5F525AUQF4O5SHFSENFK6MOYA.data
new file mode 100644
index 00000000..a3e60c9e
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/OY/CIQDEMXSPYNNJT5CP2V6OMAMSK3VAN5F525AUQF4O5SHFSENFK6MOYA.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/P7/CIQDOBRSMVCPS3KLV6TKWC6SUHX5RG5GAWZO4GENXABVJY3S6QTDP7I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/P7/CIQDOBRSMVCPS3KLV6TKWC6SUHX5RG5GAWZO4GENXABVJY3S6QTDP7I.data
new file mode 100644
index 00000000..bb713c56
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/P7/CIQDOBRSMVCPS3KLV6TKWC6SUHX5RG5GAWZO4GENXABVJY3S6QTDP7I.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/PX/CIQPRIAYMCVHLHZ6F4ZK5YTMBUFWS3GJM3NMSWKCSETRGTI5EWJKPXQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/PX/CIQPRIAYMCVHLHZ6F4ZK5YTMBUFWS3GJM3NMSWKCSETRGTI5EWJKPXQ.data
new file mode 100644
index 00000000..5accb645
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/PX/CIQPRIAYMCVHLHZ6F4ZK5YTMBUFWS3GJM3NMSWKCSETRGTI5EWJKPXQ.data
@@ -0,0 +1,3 @@
+5
+" $��G�,�A�4{���x�Z/.����D`�200Bytes.txt�
+
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QE/CIQBIGRKU5D2NNT76M7A62F6AVPNDXPC7EBVACIKPANWX3MEZ5FOQEA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QE/CIQBIGRKU5D2NNT76M7A62F6AVPNDXPC7EBVACIKPANWX3MEZ5FOQEA.data
new file mode 100644
index 00000000..c3a2f685
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QE/CIQBIGRKU5D2NNT76M7A62F6AVPNDXPC7EBVACIKPANWX3MEZ5FOQEA.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QF/CIQCIGHH7ZD6OLFCIHXDI65QV7HXRBC2F4XBVG4KUCTQIA2EMAJ7QFY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QF/CIQCIGHH7ZD6OLFCIHXDI65QV7HXRBC2F4XBVG4KUCTQIA2EMAJ7QFY.data
new file mode 100644
index 00000000..a655cf83
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QF/CIQCIGHH7ZD6OLFCIHXDI65QV7HXRBC2F4XBVG4KUCTQIA2EMAJ7QFY.data
@@ -0,0 +1,5 @@
+
+���wx���xM��{
+D���zH/&^����RS���/��v,��R
+�=��N���g~���pf1�\[�>�%��U�1�@Q���׀2&m6�q���Q؁��]��|���!�KE�~J֕읝�o�j��b�n3�eT�)D+;s
+컓��:Ty!c�3����\*���T7��E?[��Pv}��A+�c�x�~�e��
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QF/CIQGPALRQ24P6NS4OWHTQ7R247ZI7KJWP3QWPQYS43LFULQC5ANLQFI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QF/CIQGPALRQ24P6NS4OWHTQ7R247ZI7KJWP3QWPQYS43LFULQC5ANLQFI.data
new file mode 100644
index 00000000..a8f98693
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QF/CIQGPALRQ24P6NS4OWHTQ7R247ZI7KJWP3QWPQYS43LFULQC5ANLQFI.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QU/CIQHUGO6PZFU3HS5W5Y25STP74OMT5SRRJZXL4LHDRQ3SIM7NCODQUI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QU/CIQHUGO6PZFU3HS5W5Y25STP74OMT5SRRJZXL4LHDRQ3SIM7NCODQUI.data
new file mode 100644
index 00000000..6d043733
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QU/CIQHUGO6PZFU3HS5W5Y25STP74OMT5SRRJZXL4LHDRQ3SIM7NCODQUI.data
@@ -0,0 +1,2 @@
+
+r�ː�'Q��#��
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QV/CIQHXG7QQS5B7FZ3UOMUYOWJZ5KWXCKGGO7MBLHYPX6ZZ5XZK66AQVI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QV/CIQHXG7QQS5B7FZ3UOMUYOWJZ5KWXCKGGO7MBLHYPX6ZZ5XZK66AQVI.data
new file mode 100644
index 00000000..1524efce
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QV/CIQHXG7QQS5B7FZ3UOMUYOWJZ5KWXCKGGO7MBLHYPX6ZZ5XZK66AQVI.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QV/CIQOHMGEIKMPYHAUTL57JSEZN64SIJ5OIHSGJG4TJSSJLGI3PBJLQVI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QV/CIQOHMGEIKMPYHAUTL57JSEZN64SIJ5OIHSGJG4TJSSJLGI3PBJLQVI.data
new file mode 100644
index 00000000..e69de29b
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/R3/CIQBED3K6YA5I3QQWLJOCHWXDRK5EXZQILBCKAPEDUJENZ5B5HJ5R3A.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/R3/CIQBED3K6YA5I3QQWLJOCHWXDRK5EXZQILBCKAPEDUJENZ5B5HJ5R3A.data
new file mode 100644
index 00000000..389e1117
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/R3/CIQBED3K6YA5I3QQWLJOCHWXDRK5EXZQILBCKAPEDUJENZ5B5HJ5R3A.data
@@ -0,0 +1,28 @@
+
+��Hello and Welcome to IPFS!
+
+██╗██████╗ ███████╗███████╗
+██║██╔══██╗██╔════╝██╔════╝
+██║██████╔╝█████╗  ███████╗
+██║██╔═══╝ ██╔══╝  ╚════██║
+██║██║     ██║     ███████║
+╚═╝╚═╝     ╚═╝     ╚══════╝
+
+If you're seeing this, you have successfully installed
+IPFS and are now interfacing with the ipfs merkledag!
+
+ -------------------------------------------------------
+| Warning:                                              |
+|   This is alpha software. Use at your own discretion! |
+|   Much is missing or lacking polish. There are bugs.  |
+|   Not yet secure. Read the security notes for more.   |
+ -------------------------------------------------------
+
+Check out some of the other files in this directory:
+
+  ./about
+  ./help
+  ./quick-start     <-- usage examples
+  ./readme          <-- this file
+  ./security-notes
+�
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/RJ/CIQMLF3XEWG6VSOUXDSJ5BG26LKETTMBVMXIW7JKRHRPLIBNCFCYRJI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/RJ/CIQMLF3XEWG6VSOUXDSJ5BG26LKETTMBVMXIW7JKRHRPLIBNCFCYRJI.data
new file mode 100644
index 00000000..5a59204a
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/RJ/CIQMLF3XEWG6VSOUXDSJ5BG26LKETTMBVMXIW7JKRHRPLIBNCFCYRJI.data
@@ -0,0 +1,2 @@
+
+stem. Some��
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/RW/CIQC3G665ZIYGOQYC3MQULKGIBXEOFAZHK46I5UQ3O7EEJQP4FW6RWI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/RW/CIQC3G665ZIYGOQYC3MQULKGIBXEOFAZHK46I5UQ3O7EEJQP4FW6RWI.data
new file mode 100644
index 00000000..1a86e0be
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/RW/CIQC3G665ZIYGOQYC3MQULKGIBXEOFAZHK46I5UQ3O7EEJQP4FW6RWI.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/RX/CIQE66Z547DTRXMML2XLVBUPU4PW4HGY3HNOT22D27SWEWL7BM4KRXA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/RX/CIQE66Z547DTRXMML2XLVBUPU4PW4HGY3HNOT22D27SWEWL7BM4KRXA.data
new file mode 100644
index 00000000..74f62a02
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/RX/CIQE66Z547DTRXMML2XLVBUPU4PW4HGY3HNOT22D27SWEWL7BM4KRXA.data
@@ -0,0 +1,3 @@
+
+
+�'Q��#��
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/S5/CIQHBGZNZRPWVEFNMTLP4OS5EAVHFMCX2HD7FZUC2B3WUU3D4LGKS5A.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/S5/CIQHBGZNZRPWVEFNMTLP4OS5EAVHFMCX2HD7FZUC2B3WUU3D4LGKS5A.data
new file mode 100644
index 00000000..3a99c365
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/S5/CIQHBGZNZRPWVEFNMTLP4OS5EAVHFMCX2HD7FZUC2B3WUU3D4LGKS5A.data
@@ -0,0 +1,3 @@
+4
+" Y��9_)a���˹2�R�m�Ŗke�9��js-ipfs-repo
+
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/SHARDING b/packages/ipfs-unixfs-importer/test/test-repo/blocks/SHARDING
new file mode 100644
index 00000000..a153331d
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/SHARDING
@@ -0,0 +1 @@
+/repo/flatfs/shard/v1/next-to-last/2
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/SQ/CIQABZII22CKQPRFRNJDBZLZDVWDLXB4FB63ZSHKE25TXTZ5PRFNSQQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/SQ/CIQABZII22CKQPRFRNJDBZLZDVWDLXB4FB63ZSHKE25TXTZ5PRFNSQQ.data
new file mode 100644
index 00000000..38a7ed3a
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/SQ/CIQABZII22CKQPRFRNJDBZLZDVWDLXB4FB63ZSHKE25TXTZ5PRFNSQQ.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/SS/CIQJYN3KXS5PFDN3C3UKITZEAQ5E2ZZSIKJ5GLFKYZH5G5GPOPZRSSQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/SS/CIQJYN3KXS5PFDN3C3UKITZEAQ5E2ZZSIKJ5GLFKYZH5G5GPOPZRSSQ.data
new file mode 100644
index 00000000..562529a2
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/SS/CIQJYN3KXS5PFDN3C3UKITZEAQ5E2ZZSIKJ5GLFKYZH5G5GPOPZRSSQ.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/SY/CIQGL4ZZ3NU3CYC6HCGJL2PB3SCFDLNIMJ2VBGFII2ETFDJKKRWCSYY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/SY/CIQGL4ZZ3NU3CYC6HCGJL2PB3SCFDLNIMJ2VBGFII2ETFDJKKRWCSYY.data
new file mode 100644
index 00000000..dedf499f
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/SY/CIQGL4ZZ3NU3CYC6HCGJL2PB3SCFDLNIMJ2VBGFII2ETFDJKKRWCSYY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/T3/CIQFIELZYM2Q2O27HFMWNO3RQCBUN42MNB4WYEHTCKOBKEOQC4X4T3I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/T3/CIQFIELZYM2Q2O27HFMWNO3RQCBUN42MNB4WYEHTCKOBKEOQC4X4T3I.data
new file mode 100644
index 00000000..9e5174d0
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/T3/CIQFIELZYM2Q2O27HFMWNO3RQCBUN42MNB4WYEHTCKOBKEOQC4X4T3I.data
@@ -0,0 +1,4 @@
+5
+" $��G�,�A�4{���x�Z/.����D`�200Bytes.txt�/
+" Y��9_)a���˹2�R�m�Ŗke�9��level-2
+
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/T3/CIQHHFPK232RMHO7DXAEDSDEBS2BUY2XK2X3LJTP4SPIM5NYBINUT3Y.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/T3/CIQHHFPK232RMHO7DXAEDSDEBS2BUY2XK2X3LJTP4SPIM5NYBINUT3Y.data
new file mode 100644
index 00000000..5a3836e9
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/T3/CIQHHFPK232RMHO7DXAEDSDEBS2BUY2XK2X3LJTP4SPIM5NYBINUT3Y.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/TK/CIQCATTJHTFQJK6QMVRDXHZBQLVTXHRZR2G3R5OU3G7HREQTYK3KTKQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/TK/CIQCATTJHTFQJK6QMVRDXHZBQLVTXHRZR2G3R5OU3G7HREQTYK3KTKQ.data
new file mode 100644
index 00000000..a4027d46
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/TK/CIQCATTJHTFQJK6QMVRDXHZBQLVTXHRZR2G3R5OU3G7HREQTYK3KTKQ.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/TW/CIQFEAGMNNXXTYKYQSANT6IBNTFN7WR5RPD5F6GN6MBKUUO25DNOTWQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/TW/CIQFEAGMNNXXTYKYQSANT6IBNTFN7WR5RPD5F6GN6MBKUUO25DNOTWQ.data
new file mode 100644
index 00000000..10aa2ae4
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/TW/CIQFEAGMNNXXTYKYQSANT6IBNTFN7WR5RPD5F6GN6MBKUUO25DNOTWQ.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/TW/CIQMVX3GSIETJNHF3OOH7TMLHB74V6MEEZY5V54Z7JHJV2MUZ7R2TWI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/TW/CIQMVX3GSIETJNHF3OOH7TMLHB74V6MEEZY5V54Z7JHJV2MUZ7R2TWI.data
new file mode 100644
index 00000000..c1f9899a
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/TW/CIQMVX3GSIETJNHF3OOH7TMLHB74V6MEEZY5V54Z7JHJV2MUZ7R2TWI.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/U2/CIQBQTRBKKRZNSXRN5A2DTCL5QYIC75SVOKTZJSFYV7IHLCDDT6IU2Q.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/U2/CIQBQTRBKKRZNSXRN5A2DTCL5QYIC75SVOKTZJSFYV7IHLCDDT6IU2Q.data
new file mode 100644
index 00000000..4e910622
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/U2/CIQBQTRBKKRZNSXRN5A2DTCL5QYIC75SVOKTZJSFYV7IHLCDDT6IU2Q.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/UI/CIQIKOTOTJQ2NFB4A3MMLHKZBTNOIK3QAZ3XSCASZZUPD2ZDRHTOUIY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/UI/CIQIKOTOTJQ2NFB4A3MMLHKZBTNOIK3QAZ3XSCASZZUPD2ZDRHTOUIY.data
new file mode 100644
index 00000000..871a6bf0
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/UI/CIQIKOTOTJQ2NFB4A3MMLHKZBTNOIK3QAZ3XSCASZZUPD2ZDRHTOUIY.data
@@ -0,0 +1,4729 @@
+
+����
+����There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file sy��
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/UN/CIQJXEREB4VRPZXUPP4EUUMABGQ4FLCIV7RFAMWV6VTYHGGTHMQOUNA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/UN/CIQJXEREB4VRPZXUPP4EUUMABGQ4FLCIV7RFAMWV6VTYHGGTHMQOUNA.data
new file mode 100644
index 00000000..a6e00f34
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/UN/CIQJXEREB4VRPZXUPP4EUUMABGQ4FLCIV7RFAMWV6VTYHGGTHMQOUNA.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/UN/CIQOMBKARLB7PAITVSNH7VEGIQJRPL6J7FT2XYVKAXT4MQPXXPUYUNY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/UN/CIQOMBKARLB7PAITVSNH7VEGIQJRPL6J7FT2XYVKAXT4MQPXXPUYUNY.data
new file mode 100644
index 00000000..b6539897
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/UN/CIQOMBKARLB7PAITVSNH7VEGIQJRPL6J7FT2XYVKAXT4MQPXXPUYUNY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VD/CIQM57VXK2GGRETV46PZJAGXIPIE5O6JRUKAV7BBCJWARIYFH3ZEVDY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VD/CIQM57VXK2GGRETV46PZJAGXIPIE5O6JRUKAV7BBCJWARIYFH3ZEVDY.data
new file mode 100644
index 00000000..6b72d373
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VD/CIQM57VXK2GGRETV46PZJAGXIPIE5O6JRUKAV7BBCJWARIYFH3ZEVDY.data
@@ -0,0 +1,2 @@
+
+u����	��r[��
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VJ/CIQCHY6PCZKAFELCCPFO7GDQ6JVXLCA47BWB47DSAT5DLNKZC4BBVJY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VJ/CIQCHY6PCZKAFELCCPFO7GDQ6JVXLCA47BWB47DSAT5DLNKZC4BBVJY.data
new file mode 100644
index 00000000..9cda061b
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VJ/CIQCHY6PCZKAFELCCPFO7GDQ6JVXLCA47BWB47DSAT5DLNKZC4BBVJY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VK/CIQECT7TVGIFJGMWNC2ZGSXRYQXFPA4ZVFEFYTIZ5CZLMW5F3VZDVKY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VK/CIQECT7TVGIFJGMWNC2ZGSXRYQXFPA4ZVFEFYTIZ5CZLMW5F3VZDVKY.data
new file mode 100644
index 00000000..7f2f4e92
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VK/CIQECT7TVGIFJGMWNC2ZGSXRYQXFPA4ZVFEFYTIZ5CZLMW5F3VZDVKY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VO/CIQGFTQ7FSI2COUXWWLOQ45VUM2GUZCGAXLWCTOKKPGTUWPXHBNIVOY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VO/CIQGFTQ7FSI2COUXWWLOQ45VUM2GUZCGAXLWCTOKKPGTUWPXHBNIVOY.data
new file mode 100644
index 00000000..2dd80560
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VO/CIQGFTQ7FSI2COUXWWLOQ45VUM2GUZCGAXLWCTOKKPGTUWPXHBNIVOY.data
@@ -0,0 +1,114 @@
+
+��# 0.1 - Quick Start
+
+This is a set of short examples with minimal explanation. It is meant as
+a "quick start". Soon, we'll write a longer tour :-)
+
+
+Add a file to ipfs:
+
+  echo "hello world" >hello
+  ipfs add hello
+
+
+View it:
+
+  ipfs cat <the-hash-you-got-here>
+
+
+Try a directory:
+
+  mkdir foo
+  mkdir foo/bar
+  echo "baz" > foo/baz
+  echo "baz" > foo/bar/baz
+  ipfs add -r foo
+
+
+View things:
+
+  ipfs ls <the-hash-here>
+  ipfs ls <the-hash-here>/bar
+  ipfs cat <the-hash-here>/baz
+  ipfs cat <the-hash-here>/bar/baz
+  ipfs cat <the-hash-here>/bar
+  ipfs ls <the-hash-here>/baz
+
+
+References:
+
+  ipfs refs <the-hash-here>
+  ipfs refs -r <the-hash-here>
+  ipfs refs --help
+
+
+Get:
+
+  ipfs get <the-hash-here> foo2
+  diff foo foo2
+
+
+Objects:
+
+  ipfs object get <the-hash-here>
+  ipfs object get <the-hash-here>/foo2
+  ipfs object --help
+
+
+Pin + GC:
+
+  ipfs pin -r <the-hash-here>
+  ipfs gc
+  ipfs ls <the-hash-here>
+  ipfs unpin -r <the-hash-here>
+  ipfs gc
+
+
+Daemon:
+
+  ipfs daemon  (in another terminal)
+  ipfs id
+
+
+Network:
+
+  (must be online)
+  ipfs swarm peers
+  ipfs id
+  ipfs cat <hash-of-remote-object>
+
+
+Mount:
+
+  (warning: fuse is finicky!)
+  ipfs mount
+  cd /ipfs/<
+
+
+Tool:
+
+  ipfs version
+  ipfs update
+  ipfs commands
+  ipfs config --help
+  open http://localhost:5001/webui
+
+
+Browse:
+
+  webui:
+
+    http://localhost:5001/webui
+
+  video:
+
+    http://localhost:8080/ipfs/QmVc6zuAneKJzicnJpfrqCH9gSy6bz54JhcypfJYhGUFQu/play#/ipfs/QmTKZgRNwDNZwHtJSjCp6r5FYefzpULfy37JvMt9DwvXse
+
+  images:
+
+    http://localhost:8080/ipfs/QmZpc3HvfjEXvLWGQPWbHk3AjD5j8NEN4gmFN8Jmrd5g83/cs
+
+  markdown renderer app:
+
+    http://localhost:8080/ipfs/QmX7M9CiYXjVeFnkfVGf3y5ixTZ2ACeSGyL1vBJY1HvQPp/mdown
+�
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VR/CIQDWXASKC6E6M5YUHWLFE3D5ORIQALUCDXFGAGNWUDBLMHCNE7NVRQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VR/CIQDWXASKC6E6M5YUHWLFE3D5ORIQALUCDXFGAGNWUDBLMHCNE7NVRQ.data
new file mode 100644
index 00000000..64ce0aeb
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VR/CIQDWXASKC6E6M5YUHWLFE3D5ORIQALUCDXFGAGNWUDBLMHCNE7NVRQ.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/W4/CIQCZN3PHPV7FIQRZ2RMICUTK4IM4CCOKKJ45QAY2MK34ECEPNVWW4I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/W4/CIQCZN3PHPV7FIQRZ2RMICUTK4IM4CCOKKJ45QAY2MK34ECEPNVWW4I.data
new file mode 100644
index 00000000..81663143
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/W4/CIQCZN3PHPV7FIQRZ2RMICUTK4IM4CCOKKJ45QAY2MK34ECEPNVWW4I.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/WH/CIQOEE6HDAGCAUN6YNJ2WMWJUZ6PTIZPGPCLKYGPGTIRX5IMJNXZWHQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/WH/CIQOEE6HDAGCAUN6YNJ2WMWJUZ6PTIZPGPCLKYGPGTIRX5IMJNXZWHQ.data
new file mode 100644
index 00000000..b75d8023
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/WH/CIQOEE6HDAGCAUN6YNJ2WMWJUZ6PTIZPGPCLKYGPGTIRX5IMJNXZWHQ.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/X3/CIQFTFEEHEDF6KLBT32BFAGLXEZL4UWFNWM4LFTLMXQBCERZ6CMLX3Y.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/X3/CIQFTFEEHEDF6KLBT32BFAGLXEZL4UWFNWM4LFTLMXQBCERZ6CMLX3Y.data
new file mode 100644
index 00000000..9553a942
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/X3/CIQFTFEEHEDF6KLBT32BFAGLXEZL4UWFNWM4LFTLMXQBCERZ6CMLX3Y.data
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/XL/CIQAARXKSMRHHPDJX4RMZVL2FA3652JYS3PGJZYATFZEVYZTYZ7OXLQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/XL/CIQAARXKSMRHHPDJX4RMZVL2FA3652JYS3PGJZYATFZEVYZTYZ7OXLQ.data
new file mode 100644
index 00000000..e80dbd9a
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/XL/CIQAARXKSMRHHPDJX4RMZVL2FA3652JYS3PGJZYATFZEVYZTYZ7OXLQ.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/XO/CIQJGO2B2N75IUEM372FSMG76VV256I4PXBULZZ5ASNLK4FL4EG7XOI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/XO/CIQJGO2B2N75IUEM372FSMG76VV256I4PXBULZZ5ASNLK4FL4EG7XOI.data
new file mode 100644
index 00000000..d899663b
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/XO/CIQJGO2B2N75IUEM372FSMG76VV256I4PXBULZZ5ASNLK4FL4EG7XOI.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/Y5/CIQDNPSZDJIJ4OGB34IJIMF4NVVGGYKXGXX4DKZ3GF5O4XQ5TCEKY5I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/Y5/CIQDNPSZDJIJ4OGB34IJIMF4NVVGGYKXGXX4DKZ3GF5O4XQ5TCEKY5I.data
new file mode 100644
index 00000000..ba0caf40
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/Y5/CIQDNPSZDJIJ4OGB34IJIMF4NVVGGYKXGXX4DKZ3GF5O4XQ5TCEKY5I.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/YE/CIQAHHAIILXU6ZJ3QZRQJFXG22DLMMTR3ZMBZ3P3DXUEXXVG6UCOYEQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/YE/CIQAHHAIILXU6ZJ3QZRQJFXG22DLMMTR3ZMBZ3P3DXUEXXVG6UCOYEQ.data
new file mode 100644
index 00000000..1d48c015
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/YE/CIQAHHAIILXU6ZJ3QZRQJFXG22DLMMTR3ZMBZ3P3DXUEXXVG6UCOYEQ.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/YI/CIQN7OB5A4BJDFSWOWU5P2PHGINZFYFOW4SGA22CWXPWRHTLKR6MYII.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/YI/CIQN7OB5A4BJDFSWOWU5P2PHGINZFYFOW4SGA22CWXPWRHTLKR6MYII.data
new file mode 100644
index 00000000..b1df8c51
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/YI/CIQN7OB5A4BJDFSWOWU5P2PHGINZFYFOW4SGA22CWXPWRHTLKR6MYII.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/Z3/CIQHZ35U3FQWXNWA5EK5UUB7WWOAO6DB6OS3PKO65SS674P4ZTJ4Z3A.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/Z3/CIQHZ35U3FQWXNWA5EK5UUB7WWOAO6DB6OS3PKO65SS674P4ZTJ4Z3A.data
new file mode 100644
index 00000000..b0ac590e
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/Z3/CIQHZ35U3FQWXNWA5EK5UUB7WWOAO6DB6OS3PKO65SS674P4ZTJ4Z3A.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZE/CIQAWCSZTDXZMMUBJ4ZT5AP4QK7NG2ICSLINABMIGOV3FL2GV5NBZEI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZE/CIQAWCSZTDXZMMUBJ4ZT5AP4QK7NG2ICSLINABMIGOV3FL2GV5NBZEI.data
new file mode 100644
index 00000000..3b40300d
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZE/CIQAWCSZTDXZMMUBJ4ZT5AP4QK7NG2ICSLINABMIGOV3FL2GV5NBZEI.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZJ/CIQAC3JCY6ILJZIESXPWN37WGYJHMRBO75J3KRL47DPFYLDHEYZFZJY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZJ/CIQAC3JCY6ILJZIESXPWN37WGYJHMRBO75J3KRL47DPFYLDHEYZFZJY.data
new file mode 100644
index 00000000..819ec6cf
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZJ/CIQAC3JCY6ILJZIESXPWN37WGYJHMRBO75J3KRL47DPFYLDHEYZFZJY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZP/CIQK5Z2VYLPEM4X75GWQP23OV737H23CYXARTYUHX2LD73PQPQMTZPY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZP/CIQK5Z2VYLPEM4X75GWQP23OV737H23CYXARTYUHX2LD73PQPQMTZPY.data
new file mode 100644
index 00000000..c57d7186
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZP/CIQK5Z2VYLPEM4X75GWQP23OV737H23CYXARTYUHX2LD73PQPQMTZPY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/_README b/packages/ipfs-unixfs-importer/test/test-repo/blocks/_README
new file mode 100644
index 00000000..23cb0909
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/_README
@@ -0,0 +1,30 @@
+This is a repository of IPLD objects. Each IPLD object is in a single file,
+named <base32 encoding of cid>.data. Where <base32 encoding of cid> is the
+"base32" encoding of the CID (as specified in
+https://github.com/multiformats/multibase) without the 'B' prefix.
+All the object files are placed in a tree of directories, based on a
+function of the CID. This is a form of sharding similar to
+the objects directory in git repositories. Previously, we used
+prefixes, we now use the next-to-last two charters.
+
+    func NextToLast(base32cid string) {
+      nextToLastLen := 2
+      offset := len(base32cid) - nextToLastLen - 1
+      return str[offset : offset+nextToLastLen]
+    }
+
+For example, an object with a base58 CIDv1 of
+
+    zb2rhYSxw4ZjuzgCnWSt19Q94ERaeFhu9uSqRgjSdx9bsgM6f
+
+has a base32 CIDv1 of
+
+    BAFKREIA22FLID5AJ2KU7URG47MDLROZIH6YF2KALU2PWEFPVI37YLKRSCA
+
+and will be placed at
+
+    SC/AFKREIA22FLID5AJ2KU7URG47MDLROZIH6YF2KALU2PWEFPVI37YLKRSCA.data
+
+with 'SC' being the last-to-next two characters and the 'B' at the
+beginning of the CIDv1 string is the multibase prefix that is not
+stored in the filename.
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/config b/packages/ipfs-unixfs-importer/test/test-repo/config
new file mode 100644
index 00000000..cbcdfe3b
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/config
@@ -0,0 +1 @@
+{"Identity":{"PeerID":"QmQ2zigjQikYnyYUSXZydNXrDRhBut2mubwJBaLXobMt3A","PrivKey":"CAASpgkwggSiAgEAAoIBAQC2SKo/HMFZeBml1AF3XijzrxrfQXdJzjePBZAbdxqKR1Mc6juRHXij6HXYPjlAk01BhF1S3Ll4Lwi0cAHhggf457sMg55UWyeGKeUv0ucgvCpBwlR5cQ020i0MgzjPWOLWq1rtvSbNcAi2ZEVn6+Q2EcHo3wUvWRtLeKz+DZSZfw2PEDC+DGPJPl7f8g7zl56YymmmzH9liZLNrzg/qidokUv5u1pdGrcpLuPNeTODk0cqKB+OUbuKj9GShYECCEjaybJDl9276oalL9ghBtSeEv20kugatTvYy590wFlJkkvyl+nPxIH0EEYMKK9XRWlu9XYnoSfboiwcv8M3SlsjAgMBAAECggEAZtju/bcKvKFPz0mkHiaJcpycy9STKphorpCT83srBVQi59CdFU6Mj+aL/xt0kCPMVigJw8P3/YCEJ9J+rS8BsoWE+xWUEsJvtXoT7vzPHaAtM3ci1HZd302Mz1+GgS8Epdx+7F5p80XAFLDUnELzOzKftvWGZmWfSeDnslwVONkL/1VAzwKy7Ce6hk4SxRE7l2NE2OklSHOzCGU1f78ZzVYKSnS5Ag9YrGjOAmTOXDbKNKN/qIorAQ1bovzGoCwx3iGIatQKFOxyVCyO1PsJYT7JO+kZbhBWRRE+L7l+ppPER9bdLFxs1t5CrKc078h+wuUr05S1P1JjXk68pk3+kQKBgQDeK8AR11373Mzib6uzpjGzgNRMzdYNuExWjxyxAzz53NAR7zrPHvXvfIqjDScLJ4NcRO2TddhXAfZoOPVH5k4PJHKLBPKuXZpWlookCAyENY7+Pd55S8r+a+MusrMagYNljb5WbVTgN8cgdpim9lbbIFlpN6SZaVjLQL3J8TWH6wKBgQDSChzItkqWX11CNstJ9zJyUE20I7LrpyBJNgG1gtvz3ZMUQCn3PxxHtQzN9n1P0mSSYs+jBKPuoSyYLt1wwe10/lpgL4rkKWU3/m1Myt0tveJ9WcqHh6tzcAbb/fXpUFT/o4SWDimWkPkuCb+8j//2yiXk0a/T2f36zKMuZvujqQKBgC6B7BAQDG2H2B/ijofp12ejJU36nL98gAZyqOfpLJ+FeMz4TlBDQ+phIMhnHXA5UkdDapQ+zA3SrFk+6yGk9Vw4Hf46B+82SvOrSbmnMa+PYqKYIvUzR4gg34rL/7AhwnbEyD5hXq4dHwMNsIDq+l2elPjwm/U9V0gdAl2+r50HAoGALtsKqMvhv8HucAMBPrLikhXP/8um8mMKFMrzfqZ+otxfHzlhI0L08Bo3jQrb0Z7ByNY6M8epOmbCKADsbWcVre/AAY0ZkuSZK/CaOXNX/AhMKmKJh8qAOPRY02LIJRBCpfS4czEdnfUhYV/TYiFNnKRj57PPYZdTzUsxa/yVTmECgYBr7slQEjb5Onn5mZnGDh+72BxLNdgwBkhO0OCdpdISqk0F0Pxby22DFOKXZEpiyI9XYP1C8wPiJsShGm2yEwBPWXnrrZNWczaVuCbXHrZkWQogBDG3HGXNdU4MAWCyiYlyinIBpPpoAJZSzpGLmWbMWh28+RJS6AQX6KHrK1o2uw=="},"Datastore":{"Type":"","Path":"","StorageMax":"","StorageGCWatermark":0,"GCPeriod":"","Params":null,"NoSync":false},"Addresses":{"Swarm":["/ip4/0.0.0.0/tcp/4001","/ip6/::/tcp/4001"],"API":"/ip4/127.0.0.1/tcp/5001","Gateway":"/ip4/127.0.0.1/tcp/8080"},"Mounts":{"IPFS":"/ipfs","IPNS":"/ipns","FuseAllowOther":false},"Version":{"Current":"0.4.0-dev","Check":"error","CheckDate":"0001-01-01T00:00:00Z","CheckPeriod":"172800000000000","AutoUpdate":"minor"},"Discovery":{"MDNS":{"Enabled":true,"Interval":10}},"Ipns":{"RepublishPeriod":"","RecordLifetime":"","ResolveCacheSize":128},"Bootstrap":["/ip4/104.131.131.82/tcp/4001/ipfs/QmaCpDMGvV2BGHeYERUEnRQAwe3N8SzbUtfsmvsqQLuvuJ","/ip4/104.236.176.52/tcp/4001/ipfs/QmSoLnSGccFuZQJzRadHn95W2CrSFmZuTdDWP8HXaHca9z","/ip4/104.236.179.241/tcp/4001/ipfs/QmSoLPppuBtQSGwKDZT2M73ULpjvfd3aZ6ha4oFGL1KrGM","/ip4/162.243.248.213/tcp/4001/ipfs/QmSoLueR4xBeUbY9WZ9xGUUxunbKWcrNFTDAadQJmocnWm","/ip4/128.199.219.111/tcp/4001/ipfs/QmSoLSafTMBsPKadTEgaXctDQVcqN88CNLHXMkTNwMKPnu","/ip4/104.236.76.40/tcp/4001/ipfs/QmSoLV4Bbm51jM9C4gDYZQ9Cy3U6aXMJDAbzgu2fzaDs64","/ip4/178.62.158.247/tcp/4001/ipfs/QmSoLer265NRgSp2LA3dPaeykiS1J6DifTC88f5uVQKNAd","/ip4/178.62.61.185/tcp/4001/ipfs/QmSoLMeWqB7YGVLJN3pNLQpmmEk35v6wYtsMGLzSr5QBU3","/ip4/104.236.151.122/tcp/4001/ipfs/QmSoLju6m7xTh3DuokvT3886QRYqxAzb1kShaanJgW36yx"],"Tour":{"Last":""},"Gateway":{"HTTPHeaders":null,"RootRedirect":"","Writable":false},"SupernodeRouting":{"Servers":["/ip4/104.236.176.52/tcp/4002/ipfs/QmXdb7tWTxdFEQEFgWBqkuYSrZd3mXrC7HxkD4krGNYx2U","/ip4/104.236.179.241/tcp/4002/ipfs/QmVRqViDByUxjUMoPnjurjKvZhaEMFDtK35FJXHAM4Lkj6","/ip4/104.236.151.122/tcp/4002/ipfs/QmSZwGx8Tn8tmcM4PtDJaMeUQNRhNFdBLVGPzRiNaRJtFH","/ip4/162.243.248.213/tcp/4002/ipfs/QmbHVEEepCi7rn7VL7Exxpd2Ci9NNB6ifvqwhsrbRMgQFP","/ip4/128.199.219.111/tcp/4002/ipfs/Qmb3brdCYmKG1ycwqCbo6LUwWxTuo3FisnJV2yir7oN92R","/ip4/104.236.76.40/tcp/4002/ipfs/QmdRBCV8Cz2dGhoKLkD3YjPwVFECmqADQkx5ZteF2c6Fy4","/ip4/178.62.158.247/tcp/4002/ipfs/QmUdiMPci7YoEUBkyFZAh2pAbjqcPr7LezyiPD2artLw3v","/ip4/178.62.61.185/tcp/4002/ipfs/QmVw6fGNqBixZE4bewRLT2VXX7fAHUHs8JyidDiJ1P7RUN"]},"API":{"HTTPHeaders":null},"Swarm":{"AddrFilters":null},"Log":{"MaxSizeMB":250,"MaxBackups":1,"MaxAgeDays":0}}
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/000002.ldb b/packages/ipfs-unixfs-importer/test/test-repo/datastore/000002.ldb
new file mode 100644
index 00000000..fc04d660
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/datastore/000002.ldb differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/000005.ldb b/packages/ipfs-unixfs-importer/test/test-repo/datastore/000005.ldb
new file mode 100644
index 00000000..63d9d260
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/datastore/000005.ldb differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/CURRENT b/packages/ipfs-unixfs-importer/test/test-repo/datastore/CURRENT
new file mode 100644
index 00000000..5b540107
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/datastore/CURRENT
@@ -0,0 +1 @@
+MANIFEST-000011
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOCK b/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOCK
new file mode 100644
index 00000000..e69de29b
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOG b/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOG
new file mode 100644
index 00000000..fb2ef830
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOG
@@ -0,0 +1,5 @@
+=============== Aug 19, 2016 (CEST) ===============
+15:48:10.633634 log@legend F·NumFile S·FileSize N·Entry C·BadEntry B·BadBlock Ke·KeyError D·DroppedEntry L·Level Q·SeqNum T·TimeElapsed
+15:48:10.634191 db@open opening
+15:48:10.639318 db@janitor F·4 G·0
+15:48:10.639379 db@open done T·5.16729ms
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOG.old b/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOG.old
new file mode 100644
index 00000000..f5ffd612
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOG.old
@@ -0,0 +1,7 @@
+=============== Apr 22, 2016 (WEST) ===============
+03:16:42.272495 log@legend F·NumFile S·FileSize N·Entry C·BadEntry B·BadBlock Ke·KeyError D·DroppedEntry L·Level Q·SeqNum T·TimeElapsed
+03:16:42.272857 db@open opening
+03:16:42.275673 db@janitor F·4 G·0
+03:16:42.275700 db@open done T·2.831108ms
+03:16:42.596938 db@close closing
+03:16:42.597082 db@close done T·139.194µs
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/MANIFEST-000011 b/packages/ipfs-unixfs-importer/test/test-repo/datastore/MANIFEST-000011
new file mode 100644
index 00000000..7af87ca8
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/datastore/MANIFEST-000011 differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/version b/packages/ipfs-unixfs-importer/test/test-repo/version
new file mode 100644
index 00000000..1e8b3149
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/version
@@ -0,0 +1 @@
+6