diff --git a/packages/ipfs-unixfs-importer/.aegir.js b/packages/ipfs-unixfs-importer/.aegir.js
new file mode 100644
index 00000000..69129724
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/.aegir.js
@@ -0,0 +1,7 @@
+'use strict'
+
+module.exports = {
+ karma: {
+ browserNoActivityTimeout: 500 * 1000
+ }
+}
diff --git a/packages/ipfs-unixfs-importer/.gitignore b/packages/ipfs-unixfs-importer/.gitignore
new file mode 100644
index 00000000..41396f3f
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/.gitignore
@@ -0,0 +1,45 @@
+docs
+yarn.lock
+**/node_modules/
+**/*.log
+test/repo-tests*
+**/bundle.js
+
+# Logs
+logs
+*.log
+
+coverage
+
+# Runtime data
+pids
+*.pid
+*.seed
+
+# Directory for instrumented libs generated by jscoverage/JSCover
+lib-cov
+
+# Coverage directory used by tools like istanbul
+coverage
+.nyc_output
+
+# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
+.grunt
+
+# node-waf configuration
+.lock-wscript
+
+build
+
+# Dependency directory
+# https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git
+node_modules
+
+lib
+dist
+test/test-data/go-ipfs-repo/LOCK
+test/test-data/go-ipfs-repo/LOG
+test/test-data/go-ipfs-repo/LOG.old
+
+# while testing npm5
+package-lock.json
diff --git a/packages/ipfs-unixfs-importer/.npmignore b/packages/ipfs-unixfs-importer/.npmignore
new file mode 100644
index 00000000..70ea7a67
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/.npmignore
@@ -0,0 +1,33 @@
+
+.DS_Store
+tests/repo-tests*
+
+# Logs
+logs
+*.log
+
+# Runtime data
+pids
+*.pid
+*.seed
+
+# Directory for instrumented libs generated by jscoverage/JSCover
+lib-cov
+
+# Coverage directory used by tools like istanbul
+coverage
+
+# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
+.grunt
+
+# node-waf configuration
+.lock-wscript
+
+# Compiled binary addons (http://nodejs.org/api/addons.html)
+build/Release
+
+# Dependency directory
+# https://www.npmjs.org/doc/misc/npm-faq.html#should-i-check-my-node_modules-folder-into-git
+node_modules
+
+test
diff --git a/packages/ipfs-unixfs-importer/.travis.yml b/packages/ipfs-unixfs-importer/.travis.yml
new file mode 100644
index 00000000..be3ad283
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/.travis.yml
@@ -0,0 +1,40 @@
+language: node_js
+cache: npm
+stages:
+ - check
+ - test
+ - cov
+
+node_js:
+ - '12'
+
+os:
+ - linux
+ - osx
+ - windows
+
+script: npx nyc -s npm run test:node -- --bail
+after_success: npx nyc report --reporter=text-lcov > coverage.lcov && npx codecov
+
+jobs:
+ include:
+ - stage: check
+ script:
+ - npx aegir commitlint --travis
+ - npx aegir dep-check
+ - npm run lint
+
+ - stage: test
+ name: chrome
+ addons:
+ chrome: stable
+ script: npx aegir test -t browser -t webworker
+
+ - stage: test
+ name: firefox
+ addons:
+ firefox: latest
+ script: npx aegir test -t browser -t webworker -- --browsers FirefoxHeadless
+
+notifications:
+ email: false
diff --git a/packages/ipfs-unixfs-importer/CHANGELOG.md b/packages/ipfs-unixfs-importer/CHANGELOG.md
new file mode 100644
index 00000000..bf0082ce
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/CHANGELOG.md
@@ -0,0 +1,928 @@
+
+# [0.45.0](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.44.1...v0.45.0) (2020-02-04)
+
+
+### Bug Fixes
+
+* only output unixfs things ([#49](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/49)) ([8ecdcf2](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/8ecdcf2))
+
+
+### BREAKING CHANGES
+
+* If your data is below the chunk size, and you have `rawLeaves` and
+`reduceSingleLeafToSelf` set to true, you'll get a CID that resolves
+to a bona fide UnixFS file back with metadata and all that good
+stuff instead of a `dag-raw` node.
+
+
+
+
+## [0.44.1](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.44.0...v0.44.1) (2020-02-03)
+
+
+### Performance Improvements
+
+* small bl ([#52](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/52)) ([3d461ce](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/3d461ce))
+
+
+
+
+# [0.44.0](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.43.1...v0.44.0) (2020-01-15)
+
+
+### Features
+
+* allow overriding of internal functions ([#48](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/48)) ([0bff5f2](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/0bff5f2))
+
+
+
+
+## [0.43.1](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.43.0...v0.43.1) (2020-01-09)
+
+
+### Bug Fixes
+
+* specify default codec ([4b79619](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/4b79619))
+
+
+
+
+# [0.43.0](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.42.0...v0.43.0) (2020-01-08)
+
+
+
+# [0.42.0](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.41.0...v0.42.0) (2019-11-27)
+
+
+### Performance Improvements
+
+* avoid unnecessary buffer copy ([#40](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/40)) ([b5e5b5a](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/b5e5b5a15f8460c0effbedfd6aa39a1e594733df))
+* concurrent file import ([#41](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/41)) ([68ac8cc](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/68ac8cc233dbe73fcb8244911e09ed59789cddc9)), closes [#38](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/38)
+
+
+
+
+# [0.41.0](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.40.0...v0.41.0) (2019-11-22)
+
+
+### Features
+
+* support storing metadata in unixfs nodes ([#39](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/39)) ([a47c9ed](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/a47c9ed))
+
+
+
+# [0.40.0](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.11...v0.40.0) (2019-08-05)
+
+
+### Bug Fixes
+
+* update to newest IPLD libraries ([#37](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/37)) ([f79355f](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/f79355f))
+
+
+
+## [0.39.11](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.10...v0.39.11) (2019-06-06)
+
+
+### Bug Fixes
+
+* validate rabin args ([#32](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/32)) ([55c5dba](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/55c5dba))
+
+
+
+
+## [0.39.10](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.9...v0.39.10) (2019-06-04)
+
+
+### Bug Fixes
+
+* remove unused dep ([efa2ca2](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/efa2ca2))
+
+
+### Features
+
+* use a rabin chunker in wasm ([#31](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/31)) ([d4021db](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/d4021db))
+
+
+
+
+## [0.39.9](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.8...v0.39.9) (2019-05-24)
+
+
+### Features
+
+* adds js implementation of rabin chunker for windows and browser ([#30](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/30)) ([542b3e4](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/542b3e4))
+
+
+
+
+## [0.39.8](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.7...v0.39.8) (2019-05-24)
+
+
+### Bug Fixes
+
+* make trickle dag importer compatible with go ([#29](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/29)) ([01c7323](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/01c7323))
+
+
+
+
+## [0.39.7](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.6...v0.39.7) (2019-05-23)
+
+
+### Bug Fixes
+
+* remove leftpad ([#28](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/28)) ([0aeb0f6](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/0aeb0f6))
+
+
+
+
+## [0.39.6](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.5...v0.39.6) (2019-05-20)
+
+
+### Bug Fixes
+
+* final trickle dag tests ([#27](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/27)) ([72b8bc7](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/72b8bc7))
+
+
+
+
+## [0.39.5](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.4...v0.39.5) (2019-05-20)
+
+
+
+
+## [0.39.4](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.3...v0.39.4) (2019-05-20)
+
+
+### Bug Fixes
+
+* add missing dependency async-iterator-all ([#26](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/26)) ([83d4075](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/83d4075))
+
+
+
+
+## [0.39.3](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.2...v0.39.3) (2019-05-18)
+
+
+
+
+## [0.39.2](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.1...v0.39.2) (2019-05-17)
+
+
+### Bug Fixes
+
+* move async-iterator-first out of dev deps ([7b76f4b](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/7b76f4b))
+
+
+
+
+## [0.39.1](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.39.0...v0.39.1) (2019-05-17)
+
+
+
+
+# [0.39.0](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.38.5...v0.39.0) (2019-05-17)
+
+
+### Features
+
+* switch to async await ([#24](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/24)) ([2a40ecb](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/2a40ecb))
+
+
+
+
+## [0.38.5](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.38.4...v0.38.5) (2019-03-18)
+
+
+
+
+## [0.38.4](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.38.3...v0.38.4) (2019-01-18)
+
+
+
+
+## [0.38.3](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.38.2...v0.38.3) (2019-01-16)
+
+
+### Bug Fixes
+
+* increase test timeouts for sharding ([#18](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/18)) ([bc35f6f](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/bc35f6f))
+
+
+
+
+## [0.38.2](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.38.1...v0.38.2) (2019-01-14)
+
+
+
+
+## [0.38.1](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.38.0...v0.38.1) (2019-01-14)
+
+
+### Bug Fixes
+
+* turn non-function progress callback into a noop ([#16](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/16)) ([6d2c15d](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/6d2c15d))
+
+
+
+
+# [0.38.0](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.37.3...v0.38.0) (2019-01-04)
+
+
+### Bug Fixes
+
+* pull-stream/throughs/through is not pull-through ([df0abfa](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/df0abfa))
+
+
+### Performance Improvements
+
+* do not create new buffers ([4ef5dbc](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/4ef5dbc))
+* switch out pull-block for bl ([#12](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/12)) ([4e5b618](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/4e5b618))
+* write files in parallel chunks, use a through instead of a map ([6a86d55](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/6a86d55))
+
+
+
+
+## [0.37.3](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.37.2...v0.37.3) (2018-12-19)
+
+
+### Bug Fixes
+
+* increase sharding timeouts ([69210b6](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/69210b6))
+
+
+
+
+## [0.37.2](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.37.1...v0.37.2) (2018-12-04)
+
+
+### Bug Fixes
+
+* fix regex to match files with square brackets ([986f945](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/986f945))
+
+
+
+
+## [0.37.1](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.37.0...v0.37.1) (2018-12-03)
+
+
+### Performance Improvements
+
+* deep require pull stream modules ([092b5b4](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/092b5b4))
+
+
+
+
+# [0.37.0](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.36.0...v0.37.0) (2018-11-26)
+
+
+### Features
+
+* export hash function from sharding ([7e24107](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/7e24107))
+
+
+
+
+# [0.36.0](https://github.com/ipfs/js-ipfs-unixfs-importer/compare/v0.34.0...v0.36.0) (2018-11-23)
+
+
+### Bug Fixes
+
+* support slashes in filenames ([3171fab](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/3171fab))
+
+
+### Features
+
+* split hamt out into separate module, closes [#1](https://github.com/ipfs/js-ipfs-unixfs-importer/issues/1) ([bf216a9](https://github.com/ipfs/js-ipfs-unixfs-importer/commit/bf216a9))
+
+
+
+
+# [0.34.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.33.0...v0.34.0) (2018-11-12)
+
+
+### Bug Fixes
+
+* updates ipld-dag-pb dep to version without .cid properties ([aa61cce](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/aa61cce))
+
+
+
+
+# [0.33.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.32.8...v0.33.0) (2018-10-27)
+
+
+### Bug Fixes
+
+* fixes [#230](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/230) by returning a through stream that emits the error instead of throwing it ([fdd8429](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/fdd8429))
+
+
+
+
+## [0.32.8](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.32.7...v0.32.8) (2018-10-25)
+
+
+
+
+## [0.32.7](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.32.6...v0.32.7) (2018-10-12)
+
+
+### Bug Fixes
+
+* return correct chunks of streams, fixes [#229](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/229) ([362c685](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/362c685))
+* skip rabin tests on windows ([ea9e3c3](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/ea9e3c3))
+
+
+
+
+## [0.32.6](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.32.5...v0.32.6) (2018-10-12)
+
+
+### Bug Fixes
+
+* do not use cid property of DAGNodes just yet ([7a2a308](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/7a2a308))
+
+
+
+
+## [0.32.5](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.32.4...v0.32.5) (2018-10-12)
+
+
+### Bug Fixes
+
+* do not overwrite cid property of DAGNodes ([c2e38ae](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/c2e38ae))
+* make sure errors from unmarshalling are caught ([8b2335c](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/8b2335c))
+
+
+
+
+## [0.32.4](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.32.3...v0.32.4) (2018-08-23)
+
+
+### Bug Fixes
+
+* build & export interop with go-ipfs for small file raw leaves ([11885fa](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/11885fa))
+
+
+
+
+## [0.32.3](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.32.2...v0.32.3) (2018-08-21)
+
+
+### Bug Fixes
+
+* import with CID version 1 ([6ef929d](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/6ef929d))
+* typo ([c5cb38b](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/c5cb38b))
+
+
+
+
+## [0.32.2](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.32.1...v0.32.2) (2018-08-11)
+
+
+### Bug Fixes
+
+* make rabin an optional dependency ([bef3152](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/bef3152))
+* skip first hash algorithm as it is no longer valid ([0b84b76](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/0b84b76)), closes [js-multihash#57](https://github.com/js-multihash/issues/57)
+
+
+
+
+## [0.32.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.32.0...v0.32.1) (2018-08-08)
+
+
+### Bug Fixes
+
+* do not emit empty buffers for non-empty files ([ccc4ad2](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/ccc4ad2))
+
+
+
+
+# [0.32.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.31.3...v0.32.0) (2018-08-08)
+
+
+### Features
+
+* **importer:** add rabin fingerprinting chunk algorithm ([83a5feb](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/83a5feb)), closes [ipfs/js-ipfs#1283](https://github.com/ipfs/js-ipfs/issues/1283)
+
+
+
+
+## [0.31.3](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.31.2...v0.31.3) (2018-07-24)
+
+
+### Bug Fixes
+
+* return cids from builder ([0d3d3d8](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/0d3d3d8))
+
+
+
+
+## [0.31.2](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.31.1...v0.31.2) (2018-07-20)
+
+
+
+
+## [0.31.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.31.0...v0.31.1) (2018-07-19)
+
+
+
+
+# [0.31.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.30.1...v0.31.0) (2018-07-19)
+
+
+
+
+## [0.30.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.30.0...v0.30.1) (2018-07-19)
+
+
+### Features
+
+* support --raw-leaves ([7a29d83](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/7a29d83)), closes [ipfs/js-ipfs#1432](https://github.com/ipfs/js-ipfs/issues/1432)
+
+
+
+
+# [0.30.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.29.0...v0.30.0) (2018-06-12)
+
+
+
+
+# [0.29.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.28.1...v0.29.0) (2018-04-23)
+
+
+
+
+## [0.28.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.28.0...v0.28.1) (2018-04-12)
+
+
+
+
+# [0.28.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.27.0...v0.28.0) (2018-04-10)
+
+
+
+
+# [0.27.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.26.0...v0.27.0) (2018-03-27)
+
+
+### Features
+
+* exporter - support slicing streams stored in deeply nested DAGs ([#208](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/208)) ([8568cd5](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/8568cd5))
+
+
+
+
+# [0.26.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.25.0...v0.26.0) (2018-03-22)
+
+
+### Features
+
+* Adds begin/end byte slices to exporter ([#207](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/207)) ([8e11d77](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/8e11d77))
+
+
+
+
+# [0.25.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.24.4...v0.25.0) (2018-03-20)
+
+
+### Features
+
+* Add reader to read files or part of files as streams ([833accf](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/833accf))
+
+
+
+
+## [0.24.4](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.24.3...v0.24.4) (2018-02-27)
+
+
+### Bug Fixes
+
+* use "ipld" instead of "ipld-resolver" ([f4de206](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/f4de206))
+
+
+
+
+## [0.24.3](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.24.2...v0.24.3) (2018-02-27)
+
+
+
+
+## [0.24.2](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.24.1...v0.24.2) (2017-12-15)
+
+
+
+
+## [0.24.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.24.0...v0.24.1) (2017-11-12)
+
+
+
+
+# [0.24.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.23.1...v0.24.0) (2017-11-12)
+
+
+### Features
+
+* exporter maxDepth ([#197](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/197)) ([211e4e3](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/211e4e3))
+
+
+
+
+## [0.23.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.23.0...v0.23.1) (2017-11-10)
+
+
+### Features
+
+* windows interop ([#195](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/195)) ([aa21ff3](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/aa21ff3))
+
+
+
+
+# [0.23.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.22.5...v0.23.0) (2017-11-07)
+
+
+### Features
+
+* Include hash field for exported files ([#191](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/191)) ([8b13957](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/8b13957))
+
+
+
+
+## [0.22.5](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.22.4...v0.22.5) (2017-09-08)
+
+
+### Features
+
+* Use passed cidVersion option when writing to storage ([#185](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/185)) ([0cd2d60](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/0cd2d60))
+
+
+
+
+## [0.22.4](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.22.3...v0.22.4) (2017-09-08)
+
+
+### Features
+
+* allow specify hash algorithm for large files ([#184](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/184)) ([69915da](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/69915da))
+
+
+
+
+## [0.22.3](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.22.2...v0.22.3) (2017-09-07)
+
+
+
+
+## [0.22.2](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.22.1...v0.22.2) (2017-09-07)
+
+
+### Features
+
+* Add `onlyHash` option ([#183](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/183)) ([7450a65](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/7450a65))
+* adds call to progress bar function ([#179](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/179)) ([ac6f722](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/ac6f722))
+
+
+
+
+## [0.22.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.22.0...v0.22.1) (2017-09-04)
+
+
+
+
+# [0.22.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.21.0...v0.22.0) (2017-07-23)
+
+
+
+
+# [0.21.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.20.0...v0.21.0) (2017-07-04)
+
+
+
+
+# [0.20.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.19.2...v0.20.0) (2017-06-16)
+
+
+### Features
+
+* subtree support ([#175](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/175)) ([16b788c](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/16b788c))
+
+
+
+
+## [0.19.2](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.19.1...v0.19.2) (2017-05-25)
+
+
+### Bug Fixes
+
+* **package:** update cids to version 0.5.0 ([59d6d0a](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/59d6d0a))
+
+
+### Features
+
+* dag-api direct support ([adaeb37](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/adaeb37))
+
+
+
+
+## [0.19.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.19.0...v0.19.1) (2017-03-29)
+
+
+### Bug Fixes
+
+* adding a dir: leaf node gets replaced with dir if necessary ([1d682ec](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/1d682ec))
+
+
+
+
+# [0.19.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.18.0...v0.19.0) (2017-03-24)
+
+
+### Bug Fixes
+
+* breaking the stack when importing ([993f746](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/993f746))
+* passing browser tests ([29b2740](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/29b2740))
+* using correct murmur3 codec name ([295d86e](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/295d86e))
+* using the new IPLD API ([a80f4d8](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/a80f4d8))
+
+
+
+
+# [0.18.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.17.0...v0.18.0) (2017-03-22)
+
+
+### Bug Fixes
+
+* **package:** update ipld-dag-pb to version 0.10.0 ([#154](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/154)) ([304ff25](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/304ff25))
+* **package:** update pull-pause to version 0.0.1 ([#153](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/153)) ([4dd2143](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/4dd2143))
+
+
+### Features
+
+* upgrade to the next version of ipfs-block and blockservice ([0ca25b2](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/0ca25b2))
+
+
+
+
+# [0.17.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.16.1...v0.17.0) (2017-02-08)
+
+
+### Features
+
+* update to latest ipld-resolver ([#137](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/137)) ([211dfb6](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/211dfb6))
+
+
+
+
+## [0.16.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.16.0...v0.16.1) (2017-02-02)
+
+
+### Bug Fixes
+
+* exporter: recurse correctly into subdirs ([#136](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/136)) ([69c0d04](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/69c0d04))
+
+
+
+
+# [0.16.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.15.4...v0.16.0) (2017-02-02)
+
+
+### Bug Fixes
+
+* **package:** update is-ipfs to version 0.3.0 ([#134](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/134)) ([0063f9d](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/0063f9d))
+
+
+
+
+## [0.15.4](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.15.3...v0.15.4) (2017-01-31)
+
+
+### Bug Fixes
+
+* case for empty file ([#132](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/132)) ([fee55d1](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/fee55d1))
+
+
+
+
+## [0.15.3](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.15.2...v0.15.3) (2017-01-30)
+
+
+### Bug Fixes
+
+* expect empty stream to not generate any nodes ([#131](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/131)) ([7b054b6](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/7b054b6))
+
+
+
+
+## [0.15.2](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.15.1...v0.15.2) (2017-01-30)
+
+
+### Bug Fixes
+
+* stop export visitor from trying to resolve leaf object ([#130](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/130)) ([651f113](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/651f113))
+
+
+
+
+## [0.15.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.15.0...v0.15.1) (2017-01-29)
+
+
+### Bug Fixes
+
+* **package:** update cids to version 0.4.0 ([#122](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/122)) ([65a6759](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/65a6759))
+
+
+
+
+# [0.15.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.14.2...v0.15.0) (2017-01-11)
+
+
+
+
+## [0.14.2](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.14.1...v0.14.2) (2016-12-13)
+
+
+
+
+## [0.14.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.14.0...v0.14.1) (2016-12-08)
+
+
+
+
+# [0.14.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.13.0...v0.14.0) (2016-11-24)
+
+
+### Features
+
+* upgrade to latest dag-pb API ([#88](https://github.com/ipfs/js-ipfs-unixfs-engine/issues/88)) ([51d1245](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/51d1245))
+
+
+
+
+# [0.13.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.12.0...v0.13.0) (2016-11-03)
+
+
+
+
+# [0.12.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.11.4...v0.12.0) (2016-10-28)
+
+
+### Bug Fixes
+
+* **exporter:** add some parallel fetching of blocks where possible ([43503d4](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/43503d4))
+
+
+### Features
+
+* migrate importer to use IPLD Resolver and the new IPLD format ([89c3602](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/89c3602))
+
+
+
+
+## [0.11.4](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.11.3...v0.11.4) (2016-09-11)
+
+
+### Features
+
+* **exporter:** implement recursive file export ([68e09a7](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/68e09a7))
+
+
+
+
+## [0.11.3](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.11.2...v0.11.3) (2016-09-09)
+
+
+### Features
+
+* **exporter:** return file sizes ([73cf78a](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/73cf78a))
+
+
+
+
+## [0.11.2](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.11.1...v0.11.2) (2016-09-09)
+
+
+
+
+## [0.11.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.11.0...v0.11.1) (2016-09-09)
+
+
+
+
+# [0.11.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.10.2...v0.11.0) (2016-09-08)
+
+
+### Bug Fixes
+
+* **tests:** ignore ordering ([f8d1b2a](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/f8d1b2a))
+
+
+
+
+## [0.10.2](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.10.1...v0.10.2) (2016-08-09)
+
+
+
+
+## [0.10.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.10.0...v0.10.1) (2016-08-09)
+
+
+
+
+# [0.10.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.9.0...v0.10.0) (2016-06-28)
+
+
+
+
+# [0.9.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.8.0...v0.9.0) (2016-05-27)
+
+
+
+
+# [0.8.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.7.0...v0.8.0) (2016-05-21)
+
+
+
+
+# [0.7.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.6.1...v0.7.0) (2016-05-21)
+
+
+
+
+## [0.6.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.6.0...v0.6.1) (2016-05-05)
+
+
+
+
+# [0.6.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.5.0...v0.6.0) (2016-05-03)
+
+
+
+
+# [0.5.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.4.5...v0.5.0) (2016-04-26)
+
+
+
+
+## [0.4.5](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.4.4...v0.4.5) (2016-04-24)
+
+
+
+
+## [0.4.4](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.4.3...v0.4.4) (2016-04-24)
+
+
+
+
+## [0.4.3](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.4.2...v0.4.3) (2016-04-24)
+
+
+### Bug Fixes
+
+* clean up dependencies ([a3bee40](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/a3bee40))
+* **importer:** cleanup smaller issues ([eab17fe](https://github.com/ipfs/js-ipfs-unixfs-engine/commit/eab17fe))
+
+
+
+
+## [0.4.2](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.4.1...v0.4.2) (2016-04-19)
+
+
+
+
+## [0.4.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.4.0...v0.4.1) (2016-04-19)
+
+
+
+
+# [0.4.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.3.3...v0.4.0) (2016-04-19)
+
+
+
+
+## [0.3.3](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.3.2...v0.3.3) (2016-03-22)
+
+
+
+
+## [0.3.2](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.3.1...v0.3.2) (2016-03-22)
+
+
+
+
+## [0.3.1](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.3.0...v0.3.1) (2016-03-22)
+
+
+
+
+# [0.3.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.2.0...v0.3.0) (2016-03-21)
+
+
+
+
+# [0.2.0](https://github.com/ipfs/js-ipfs-unixfs-engine/compare/v0.1.0...v0.2.0) (2016-02-17)
+
+
+
+
+# 0.1.0 (2016-02-12)
+
+
+
diff --git a/packages/ipfs-unixfs-importer/LICENSE b/packages/ipfs-unixfs-importer/LICENSE
new file mode 100644
index 00000000..b7cf9f52
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/LICENSE
@@ -0,0 +1,22 @@
+The MIT License (MIT)
+
+Copyright (c) 2016 David Dias
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
diff --git a/packages/ipfs-unixfs-importer/README.md b/packages/ipfs-unixfs-importer/README.md
new file mode 100644
index 00000000..da32517c
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/README.md
@@ -0,0 +1,186 @@
+# ipfs-unixfs-importer
+
+[![](https://img.shields.io/badge/made%20by-Protocol%20Labs-blue.svg?style=flat-square)](http://ipn.io)
+[![](https://img.shields.io/badge/project-IPFS-blue.svg?style=flat-square)](http://ipfs.io/)
+[![](https://img.shields.io/badge/freenode-%23ipfs-blue.svg?style=flat-square)](http://webchat.freenode.net/?channels=%23ipfs)
+[![standard-readme compliant](https://img.shields.io/badge/standard--readme-OK-green.svg?style=flat-square)](https://github.com/RichardLitt/standard-readme)
+[![Build Status](https://flat.badgen.net/travis/ipfs/js-ipfs-unixfs-importer)](https://travis-ci.com/ipfs/js-ipfs-unixfs-importer)
+[![Codecov](https://codecov.io/gh/ipfs/js-ipfs-unixfs-importer/branch/master/graph/badge.svg)](https://codecov.io/gh/ipfs/js-ipfs-unixfs-importer)
+[![Dependency Status](https://david-dm.org/ipfs/js-ipfs-unixfs-importer.svg?style=flat-square)](https://david-dm.org/ipfs/js-ipfs-unixfs-importer)
+[![js-standard-style](https://img.shields.io/badge/code%20style-standard-brightgreen.svg?style=flat-square)](https://github.com/feross/standard)
+![](https://img.shields.io/badge/npm-%3E%3D3.0.0-orange.svg?style=flat-square)
+![](https://img.shields.io/badge/Node.js-%3E%3D8.0.0-orange.svg?style=flat-square)
+
+> JavaScript implementation of the layout and chunking mechanisms used by IPFS to handle Files
+
+## Lead Maintainer
+
+[Alex Potsides](https://github.com/achingbrain)
+
+## Table of Contents
+
+- [Install](#install)
+- [Usage](#usage)
+ - [Example](#example)
+ - [API](#api)
+ - [const import = importer(source, ipld [, options])](#const-import--importersource-ipld--options)
+- [Overriding internals](#overriding-internals)
+- [Contribute](#contribute)
+- [License](#license)
+
+## Install
+
+```
+> npm install ipfs-unixfs-importer
+```
+
+## Usage
+
+### Example
+
+Let's create a little directory to import:
+
+```sh
+> cd /tmp
+> mkdir foo
+> echo 'hello' > foo/bar
+> echo 'world' > foo/quux
+```
+
+And write the importing logic:
+
+```js
+const importer = require('ipfs-unixfs-importer')
+
+// Import path /tmp/foo/bar
+const source = [{
+ path: '/tmp/foo/bar',
+ content: fs.createReadStream(file)
+}, {
+ path: '/tmp/foo/quxx',
+ content: fs.createReadStream(file2)
+}]
+
+// You need to create and pass an ipld-resolve instance
+// https://github.com/ipld/js-ipld-resolver
+for await (const entry of importer(source, ipld, options)) {
+ console.info(entry)
+}
+```
+
+When run, metadata about DAGNodes in the created tree is printed until the root:
+
+```js
+{
+ cid: CID, // see https://github.com/multiformats/js-cid
+ path: 'tmp/foo/bar',
+ unixfs: UnixFS // see https://github.com/ipfs/js-ipfs-unixfs
+}
+{
+ cid: CID, // see https://github.com/multiformats/js-cid
+ path: 'tmp/foo/quxx',
+ unixfs: UnixFS // see https://github.com/ipfs/js-ipfs-unixfs
+}
+{
+ cid: CID, // see https://github.com/multiformats/js-cid
+ path: 'tmp/foo',
+ unixfs: UnixFS // see https://github.com/ipfs/js-ipfs-unixfs
+}
+{
+ cid: CID, // see https://github.com/multiformats/js-cid
+ path: 'tmp',
+ unixfs: UnixFS // see https://github.com/ipfs/js-ipfs-unixfs
+}
+```
+
+#### API
+
+```js
+const importer = require('ipfs-unixfs-importer')
+```
+
+#### const import = importer(source, ipld [, options])
+
+The `import` function returns an async iterator takes a source async iterator that yields objects of the form:
+
+```js
+{
+ path: 'a name',
+ content: (Buffer or iterator emitting Buffers),
+ mtime: (Number representing seconds since (positive) or before (negative) the Unix Epoch),
+ mode: (Number representing ugo-rwx, setuid, setguid and sticky bit)
+}
+```
+
+`import` will output file info objects as files get stored in IPFS. When stats on a node are emitted they are guaranteed to have been written.
+
+`ipld` is an instance of the [`IPLD Resolver`](https://github.com/ipld/js-ipld-resolver) or the [`js-ipfs` `dag api`](https://github.com/ipfs/interface-ipfs-core/blob/master/SPEC/DAG.md)
+
+The input's file paths and directory structure will be preserved in the [`dag-pb`](https://github.com/ipld/js-ipld-dag-pb) created nodes.
+
+`options` is an JavaScript option that might include the following keys:
+
+- `wrap` (boolean, defaults to false): if true, a wrapping node will be created
+- `shardSplitThreshold` (positive integer, defaults to 1000): the number of directory entries above which we decide to use a sharding directory builder (instead of the default flat one)
+- `chunker` (string, defaults to `"fixed"`): the chunking strategy. Supports:
+ - `fixed`
+ - `rabin`
+- `avgChunkSize` (positive integer, defaults to `262144`): the average chunk size (rabin chunker only)
+- `minChunkSize` (positive integer): the minimum chunk size (rabin chunker only)
+- `maxChunkSize` (positive integer, defaults to `262144`): the maximum chunk size
+- `strategy` (string, defaults to `"balanced"`): the DAG builder strategy name. Supports:
+ - `flat`: flat list of chunks
+ - `balanced`: builds a balanced tree
+ - `trickle`: builds [a trickle tree](https://github.com/ipfs/specs/pull/57#issuecomment-265205384)
+- `maxChildrenPerNode` (positive integer, defaults to `174`): the maximum children per node for the `balanced` and `trickle` DAG builder strategies
+- `layerRepeat` (positive integer, defaults to 4): (only applicable to the `trickle` DAG builder strategy). The maximum repetition of parent nodes for each layer of the tree.
+- `reduceSingleLeafToSelf` (boolean, defaults to `true`): optimization for, when reducing a set of nodes with one node, reduce it to that node.
+- `hamtHashFn` (async function(string) Buffer): a function that hashes file names to create HAMT shards
+- `hamtBucketBits` (positive integer, defaults to `8`): the number of bits at each bucket of the HAMT
+- `progress` (function): a function that will be called with the byte length of chunks as a file is added to ipfs.
+- `onlyHash` (boolean, defaults to false): Only chunk and hash - do not write to disk
+- `hashAlg` (string): multihash hashing algorithm to use
+- `cidVersion` (integer, default 0): the CID version to use when storing the data (storage keys are based on the CID, _including_ it's version)
+- `rawLeaves` (boolean, defaults to false): When a file would span multiple DAGNodes, if this is true the leaf nodes will not be wrapped in `UnixFS` protobufs and will instead contain the raw file bytes
+- `leafType` (string, defaults to `'file'`) what type of UnixFS node leaves should be - can be `'file'` or `'raw'` (ignored when `rawLeaves` is `true`)
+- `blockWriteConcurrency` (positive integer, defaults to 10) How many blocks to hash and write to the block store concurrently. For small numbers of large files this should be high (e.g. 50).
+- `fileImportConcurrency` (number, defaults to 50) How many files to import concurrently. For large numbers of small files this should be high (e.g. 50).
+
+## Overriding internals
+
+Several aspects of the importer are overridable by specifying functions as part of the options object with these keys:
+
+- `chunkValidator` (function): Optional function that supports the signature `async function * (source, options)`
+ - This function takes input from the `content` field of imported entries. It should transform them into `Buffer`s, throwing an error if it cannot.
+ - It should yield `Buffer` objects constructed from the `source` or throw an `Error`
+- `chunker` (function): Optional function that supports the signature `async function * (source, options)` where `source` is an async generator and `options` is an options object
+ - It should yield `Buffer` objects.
+- `bufferImporter` (function): Optional function that supports the signature `async function * (entry, source, ipld, options)`
+ - This function should read `Buffer`s from `source` and persist them using `ipld.put` or similar
+ - `entry` is the `{ path, content }` entry, `source` is an async generator that yields Buffers
+ - It should yield functions that return a Promise that resolves to an object with the properties `{ cid, unixfs, size }` where `cid` is a [CID], `unixfs` is a [UnixFS] entry and `size` is a `Number` that represents the serialized size of the [IPLD] node that holds the buffer data.
+ - Values will be pulled from this generator in parallel - the amount of parallelisation is controlled by the `blockWriteConcurrency` option (default: 10)
+- `dagBuilder` (function): Optional function that supports the signature `async function * (source, ipld, options)`
+ - This function should read `{ path, content }` entries from `source` and turn them into DAGs
+ - It should yield a `function` that returns a `Promise` that resolves to `{ cid, path, unixfs, node }` where `cid` is a `CID`, `path` is a string, `unixfs` is a UnixFS entry and `node` is a `DAGNode`.
+ - Values will be pulled from this generator in parallel - the amount of parallelisation is controlled by the `fileImportConcurrency` option (default: 50)
+- `treeBuilder` (function): Optional function that supports the signature `async function * (source, ipld, options)`
+ - This function should read `{ cid, path, unixfs, node }` entries from `source` and place them in a directory structure
+ - It should yield an object with the properties `{ cid, path, unixfs, size }` where `cid` is a `CID`, `path` is a string, `unixfs` is a UnixFS entry and `size` is a `Number`.
+
+[ipld-resolver instance]: https://github.com/ipld/js-ipld-resolver
+[UnixFS]: https://github.com/ipfs/specs/tree/master/unixfs
+[IPLD]: https://github.com/ipld/js-ipld
+[CID]: https://github.com/multiformats/js-cid
+
+## Contribute
+
+Feel free to join in. All welcome. Open an [issue](https://github.com/ipfs/js-ipfs-unixfs-importer/issues)!
+
+This repository falls under the IPFS [Code of Conduct](https://github.com/ipfs/community/blob/master/code-of-conduct.md).
+
+[![](https://cdn.rawgit.com/jbenet/contribute-ipfs-gif/master/img/contribute.gif)](https://github.com/ipfs/community/blob/master/contributing.md)
+
+## License
+
+[MIT](LICENSE)
diff --git a/packages/ipfs-unixfs-importer/package.json b/packages/ipfs-unixfs-importer/package.json
new file mode 100644
index 00000000..48524778
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/package.json
@@ -0,0 +1,94 @@
+{
+ "name": "ipfs-unixfs-importer",
+ "version": "0.45.0",
+ "description": "JavaScript implementation of the UnixFs importer used by IPFS",
+ "leadMaintainer": "Alex Potsides ",
+ "main": "src/index.js",
+ "browser": {
+ "fs": false
+ },
+ "scripts": {
+ "test": "aegir test",
+ "test:node": "aegir test -t node",
+ "test:browser": "aegir test -t browser",
+ "test:webworker": "aegir test -t webworker",
+ "build": "aegir build",
+ "lint": "aegir lint",
+ "release": "aegir release",
+ "release-minor": "aegir release --type minor",
+ "release-major": "aegir release --type major",
+ "coverage": "nyc -s npm run test:node && nyc report --reporter=html",
+ "dep-check": "aegir dep-check"
+ },
+ "repository": {
+ "type": "git",
+ "url": "git+https://github.com/ipfs/js-ipfs-unixfs-importer.git"
+ },
+ "keywords": [
+ "IPFS"
+ ],
+ "license": "MIT",
+ "bugs": {
+ "url": "https://github.com/ipfs/js-ipfs-unixfs-importer/issues"
+ },
+ "engines": {
+ "node": ">=8.0.0",
+ "npm": ">=3.0.0"
+ },
+ "homepage": "https://github.com/ipfs/js-ipfs-unixfs-importer#readme",
+ "devDependencies": {
+ "aegir": "^20.0.0",
+ "chai": "^4.2.0",
+ "cids": "~0.7.1",
+ "deep-extend": "~0.6.0",
+ "detect-node": "^2.0.4",
+ "dirty-chai": "^2.0.1",
+ "ipfs-unixfs-exporter": "^0.40.0",
+ "ipld": "^0.25.0",
+ "ipld-in-memory": "^3.0.0",
+ "it-buffer-stream": "^1.0.0",
+ "it-last": "^1.0.0",
+ "nyc": "^15.0.0",
+ "sinon": "^8.0.4"
+ },
+ "dependencies": {
+ "bl": "^4.0.0",
+ "err-code": "^2.0.0",
+ "hamt-sharding": "^1.0.0",
+ "ipfs-unixfs": "^0.3.0",
+ "ipld-dag-pb": "^0.18.0",
+ "it-all": "^1.0.1",
+ "it-batch": "^1.0.3",
+ "it-first": "^1.0.1",
+ "it-parallel-batch": "^1.0.3",
+ "merge-options": "^2.0.0",
+ "multicodec": "^1.0.0",
+ "multihashing-async": "^0.8.0",
+ "rabin-wasm": "~0.0.8"
+ },
+ "contributors": [
+ "Alan Shaw ",
+ "Alan Shaw ",
+ "Alex Potsides ",
+ "Arpit Agarwal ",
+ "Bernard Mordan ",
+ "Dan Ordille ",
+ "David Dias ",
+ "Diogo Silva ",
+ "Francisco Baio Dias ",
+ "Friedel Ziegelmayer ",
+ "Greenkeeper ",
+ "Hugo Dias ",
+ "Hugo Dias ",
+ "Marcin Rataj ",
+ "Pedro Teixeira ",
+ "Richard Littauer ",
+ "Richard Schneider ",
+ "Stephen Whitmore ",
+ "Volker Mische ",
+ "greenkeeper[bot] ",
+ "jbenet ",
+ "nginnever ",
+ "ᴠɪᴄᴛᴏʀ ʙᴊᴇʟᴋʜᴏʟᴍ "
+ ]
+}
diff --git a/packages/ipfs-unixfs-importer/src/chunker/fixed-size.js b/packages/ipfs-unixfs-importer/src/chunker/fixed-size.js
new file mode 100644
index 00000000..6cef6606
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/chunker/fixed-size.js
@@ -0,0 +1,39 @@
+'use strict'
+
+const BufferList = require('bl/BufferList')
+
+module.exports = async function * fixedSizeChunker (source, options) {
+ let bl = new BufferList()
+ let currentLength = 0
+ let emitted = false
+ const maxChunkSize = options.maxChunkSize
+
+ for await (const buffer of source) {
+ bl.append(buffer)
+
+ currentLength += buffer.length
+
+ while (currentLength >= maxChunkSize) {
+ yield bl.slice(0, maxChunkSize)
+ emitted = true
+
+ // throw away consumed bytes
+ if (maxChunkSize === bl.length) {
+ bl = new BufferList()
+ currentLength = 0
+ } else {
+ const newBl = new BufferList()
+ newBl.append(bl.shallowSlice(maxChunkSize))
+ bl = newBl
+
+ // update our offset
+ currentLength -= maxChunkSize
+ }
+ }
+ }
+
+ if (!emitted || currentLength) {
+ // return any remaining bytes or an empty buffer
+ yield bl.slice(0, currentLength)
+ }
+}
diff --git a/packages/ipfs-unixfs-importer/src/chunker/index.js b/packages/ipfs-unixfs-importer/src/chunker/index.js
new file mode 100644
index 00000000..ec2c494b
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/chunker/index.js
@@ -0,0 +1,18 @@
+'use strict'
+
+const errCode = require('err-code')
+
+const chunkers = {
+ fixed: require('../chunker/fixed-size'),
+ rabin: require('../chunker/rabin')
+}
+
+module.exports = (type, source, options) => {
+ const chunker = chunkers[type]
+
+ if (!chunker) {
+ throw errCode(new Error(`Unknkown chunker named ${type}`), 'ERR_UNKNOWN_CHUNKER')
+ }
+
+ return chunker(source, options)
+}
diff --git a/packages/ipfs-unixfs-importer/src/chunker/rabin.js b/packages/ipfs-unixfs-importer/src/chunker/rabin.js
new file mode 100644
index 00000000..6f1a0775
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/chunker/rabin.js
@@ -0,0 +1,73 @@
+'use strict'
+
+const BufferList = require('bl/BufferList')
+const { create } = require('rabin-wasm')
+const errcode = require('err-code')
+
+module.exports = async function * rabinChunker (source, options) {
+ const rabin = jsRabin()
+
+ let min, max, avg
+
+ if (options.minChunkSize && options.maxChunkSize && options.avgChunkSize) {
+ avg = options.avgChunkSize
+ min = options.minChunkSize
+ max = options.maxChunkSize
+ } else if (!options.avgChunkSize) {
+ throw errcode(new Error('please specify an average chunk size'), 'ERR_INVALID_AVG_CHUNK_SIZE')
+ } else {
+ avg = options.avgChunkSize
+ min = avg / 3
+ max = avg + (avg / 2)
+ }
+
+ // validate min/max/avg in the same way as go
+ if (min < 16) {
+ throw errcode(new Error('rabin min must be greater than 16'), 'ERR_INVALID_MIN_CHUNK_SIZE')
+ }
+
+ if (max < min) {
+ max = min
+ }
+
+ if (avg < min) {
+ avg = min
+ }
+
+ const sizepow = Math.floor(Math.log2(avg))
+
+ for await (const chunk of rabin(source, {
+ min: min,
+ max: max,
+ bits: sizepow,
+ window: options.window,
+ polynomial: options.polynomial
+ })) {
+ yield chunk
+ }
+}
+
+const jsRabin = () => {
+ return async function * (source, options) {
+ const r = await create(options.bits, options.min, options.max, options.window)
+ const buffers = new BufferList()
+
+ for await (const chunk of source) {
+ buffers.append(chunk)
+
+ const sizes = r.fingerprint(chunk)
+
+ for (let i = 0; i < sizes.length; i++) {
+ var size = sizes[i]
+ var buf = buffers.slice(0, size)
+ buffers.consume(size)
+
+ yield buf
+ }
+ }
+
+ if (buffers.length) {
+ yield buffers.slice(0)
+ }
+ }
+}
diff --git a/packages/ipfs-unixfs-importer/src/dag-builder/dir.js b/packages/ipfs-unixfs-importer/src/dag-builder/dir.js
new file mode 100644
index 00000000..42cce150
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/dag-builder/dir.js
@@ -0,0 +1,28 @@
+'use strict'
+
+const UnixFS = require('ipfs-unixfs')
+const persist = require('../utils/persist')
+const {
+ DAGNode
+} = require('ipld-dag-pb')
+
+const dirBuilder = async (item, ipld, options) => {
+ const unixfs = new UnixFS({
+ type: 'directory',
+ mtime: item.mtime,
+ mode: item.mode
+ })
+
+ const node = new DAGNode(unixfs.marshal(), [])
+ const cid = await persist(node, ipld, options)
+ const path = item.path
+
+ return {
+ cid,
+ path,
+ unixfs,
+ size: node.size
+ }
+}
+
+module.exports = dirBuilder
diff --git a/packages/ipfs-unixfs-importer/src/dag-builder/file/balanced.js b/packages/ipfs-unixfs-importer/src/dag-builder/file/balanced.js
new file mode 100644
index 00000000..732f7f76
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/dag-builder/file/balanced.js
@@ -0,0 +1,23 @@
+'use strict'
+
+const batch = require('it-batch')
+
+async function * balanced (source, reduce, options) {
+ yield await reduceToParents(source, reduce, options)
+}
+
+async function reduceToParents (source, reduce, options) {
+ const roots = []
+
+ for await (const chunked of batch(source, options.maxChildrenPerNode)) {
+ roots.push(await reduce(chunked))
+ }
+
+ if (roots.length > 1) {
+ return reduceToParents(roots, reduce, options)
+ }
+
+ return roots[0]
+}
+
+module.exports = balanced
diff --git a/packages/ipfs-unixfs-importer/src/dag-builder/file/buffer-importer.js b/packages/ipfs-unixfs-importer/src/dag-builder/file/buffer-importer.js
new file mode 100644
index 00000000..88d89bde
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/dag-builder/file/buffer-importer.js
@@ -0,0 +1,50 @@
+'use strict'
+
+const UnixFS = require('ipfs-unixfs')
+const persist = require('../../utils/persist')
+const {
+ DAGNode
+} = require('ipld-dag-pb')
+
+async function * bufferImporter (file, source, ipld, options) {
+ for await (const buffer of source) {
+ yield async () => {
+ options.progress(buffer.length)
+ let node
+ let unixfs
+ let size
+
+ const opts = {
+ ...options
+ }
+
+ if (options.rawLeaves) {
+ node = buffer
+ size = buffer.length
+
+ opts.codec = 'raw'
+ opts.cidVersion = 1
+ } else {
+ unixfs = new UnixFS({
+ type: options.leafType,
+ data: buffer,
+ mtime: file.mtime,
+ mode: file.mode
+ })
+
+ node = new DAGNode(unixfs.marshal())
+ size = node.size
+ }
+
+ const cid = await persist(node, ipld, opts)
+
+ return {
+ cid: cid,
+ unixfs,
+ size
+ }
+ }
+ }
+}
+
+module.exports = bufferImporter
diff --git a/packages/ipfs-unixfs-importer/src/dag-builder/file/flat.js b/packages/ipfs-unixfs-importer/src/dag-builder/file/flat.js
new file mode 100644
index 00000000..1ac77ef6
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/dag-builder/file/flat.js
@@ -0,0 +1,7 @@
+'use strict'
+
+const all = require('it-all')
+
+module.exports = async function * (source, reduce) {
+ yield await reduce(await all(source))
+}
diff --git a/packages/ipfs-unixfs-importer/src/dag-builder/file/index.js b/packages/ipfs-unixfs-importer/src/dag-builder/file/index.js
new file mode 100644
index 00000000..f44c5e51
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/dag-builder/file/index.js
@@ -0,0 +1,147 @@
+'use strict'
+
+const errCode = require('err-code')
+const UnixFS = require('ipfs-unixfs')
+const persist = require('../../utils/persist')
+const {
+ DAGNode,
+ DAGLink
+} = require('ipld-dag-pb')
+const all = require('it-all')
+const parallelBatch = require('it-parallel-batch')
+const mc = require('multicodec')
+
+const dagBuilders = {
+ flat: require('./flat'),
+ balanced: require('./balanced'),
+ trickle: require('./trickle')
+}
+
+async function * buildFileBatch (file, source, ipld, options) {
+ let count = -1
+ let previous
+ let bufferImporter
+
+ if (typeof options.bufferImporter === 'function') {
+ bufferImporter = options.bufferImporter
+ } else {
+ bufferImporter = require('./buffer-importer')
+ }
+
+ for await (const entry of parallelBatch(bufferImporter(file, source, ipld, options), options.blockWriteConcurrency)) {
+ count++
+
+ if (count === 0) {
+ previous = entry
+ continue
+ } else if (count === 1) {
+ yield previous
+ previous = null
+ }
+
+ yield entry
+ }
+
+ if (previous) {
+ previous.single = true
+ yield previous
+ }
+}
+
+const reduce = (file, ipld, options) => {
+ return async function (leaves) {
+ if (leaves.length === 1 && leaves[0].single && options.reduceSingleLeafToSelf) {
+ const leaf = leaves[0]
+
+ if (leaf.cid.codec === 'raw') {
+ // only one leaf node which is a buffer
+ const buffer = await ipld.get(leaf.cid)
+
+ leaf.unixfs = new UnixFS({
+ type: 'file',
+ mtime: file.mtime,
+ mode: file.mode,
+ data: buffer
+ })
+
+ const node = new DAGNode(leaf.unixfs.marshal())
+
+ leaf.cid = await ipld.put(node, mc.DAG_PB, options)
+ leaf.size = node.size
+ }
+
+ return {
+ cid: leaf.cid,
+ path: file.path,
+ unixfs: leaf.unixfs,
+ size: leaf.size
+ }
+ }
+
+ // create a parent node and add all the leaves
+ const f = new UnixFS({
+ type: 'file',
+ mtime: file.mtime,
+ mode: file.mode
+ })
+
+ const links = leaves
+ .filter(leaf => {
+ if (leaf.cid.codec === 'raw' && leaf.size) {
+ return true
+ }
+
+ if (!leaf.unixfs.data && leaf.unixfs.fileSize()) {
+ return true
+ }
+
+ return Boolean(leaf.unixfs.data.length)
+ })
+ .map((leaf) => {
+ if (leaf.cid.codec === 'raw') {
+ // node is a leaf buffer
+ f.addBlockSize(leaf.size)
+
+ return new DAGLink(leaf.name, leaf.size, leaf.cid)
+ }
+
+ if (!leaf.unixfs.data) {
+ // node is an intermediate node
+ f.addBlockSize(leaf.unixfs.fileSize())
+ } else {
+ // node is a unixfs 'file' leaf node
+ f.addBlockSize(leaf.unixfs.data.length)
+ }
+
+ return new DAGLink(leaf.name, leaf.size, leaf.cid)
+ })
+
+ const node = new DAGNode(f.marshal(), links)
+ const cid = await persist(node, ipld, options)
+
+ return {
+ cid,
+ path: file.path,
+ unixfs: f,
+ size: node.size
+ }
+ }
+}
+
+const fileBuilder = async (file, source, ipld, options) => {
+ const dagBuilder = dagBuilders[options.strategy]
+
+ if (!dagBuilder) {
+ throw errCode(new Error(`Unknown importer build strategy name: ${options.strategy}`), 'ERR_BAD_STRATEGY')
+ }
+
+ const roots = await all(dagBuilder(buildFileBatch(file, source, ipld, options), reduce(file, ipld, options), options))
+
+ if (roots.length > 1) {
+ throw errCode(new Error('expected a maximum of 1 roots and got ' + roots.length), 'ETOOMANYROOTS')
+ }
+
+ return roots[0]
+}
+
+module.exports = fileBuilder
diff --git a/packages/ipfs-unixfs-importer/src/dag-builder/file/trickle.js b/packages/ipfs-unixfs-importer/src/dag-builder/file/trickle.js
new file mode 100644
index 00000000..5149ff0b
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/dag-builder/file/trickle.js
@@ -0,0 +1,150 @@
+'use strict'
+
+const batch = require('it-batch')
+
+module.exports = function * trickleReduceToRoot (source, reduce, options) {
+ yield trickleStream(source, reduce, options)
+}
+
+async function trickleStream (source, reduce, options) {
+ let root
+ let iteration = 0
+ let maxDepth = 1
+ let subTree = root = new Root(options.layerRepeat)
+
+ for await (const layer of batch(source, options.maxChildrenPerNode)) {
+ if (subTree.isFull()) {
+ if (subTree !== root) {
+ root.addChild(await subTree.reduce(reduce))
+ }
+
+ if (iteration && iteration % options.layerRepeat === 0) {
+ maxDepth++
+ }
+
+ subTree = new SubTree(maxDepth, options.layerRepeat, iteration)
+
+ iteration++
+ }
+
+ subTree.append(layer)
+ }
+
+ if (subTree && subTree !== root) {
+ root.addChild(await subTree.reduce(reduce))
+ }
+
+ return root.reduce(reduce)
+}
+
+class SubTree {
+ constructor (maxDepth, layerRepeat, iteration) {
+ this.maxDepth = maxDepth
+ this.layerRepeat = layerRepeat
+ this.currentDepth = 1
+ this.iteration = iteration
+
+ this.root = this.node = this.parent = {
+ children: [],
+ depth: this.currentDepth,
+ maxDepth,
+ maxChildren: (this.maxDepth - this.currentDepth) * this.layerRepeat
+ }
+ }
+
+ isFull () {
+ if (!this.root.data) {
+ return false
+ }
+
+ if (this.currentDepth < this.maxDepth && this.node.maxChildren) {
+ // can descend
+ this._addNextNodeToParent(this.node)
+
+ return false
+ }
+
+ // try to find new node from node.parent
+ const distantRelative = this._findParent(this.node, this.currentDepth)
+
+ if (distantRelative) {
+ this._addNextNodeToParent(distantRelative)
+
+ return false
+ }
+
+ return true
+ }
+
+ _addNextNodeToParent (parent) {
+ this.parent = parent
+
+ // find site for new node
+ const nextNode = {
+ children: [],
+ depth: parent.depth + 1,
+ parent,
+ maxDepth: this.maxDepth,
+ maxChildren: Math.floor(parent.children.length / this.layerRepeat) * this.layerRepeat
+ }
+
+ parent.children.push(nextNode)
+
+ this.currentDepth = nextNode.depth
+ this.node = nextNode
+ }
+
+ append (layer) {
+ this.node.data = layer
+ }
+
+ reduce (reduce) {
+ return this._reduce(this.root, reduce)
+ }
+
+ async _reduce (node, reduce) {
+ let children = []
+
+ if (node.children.length) {
+ children = await Promise.all(
+ node.children
+ .filter(child => child.data)
+ .map(child => this._reduce(child, reduce))
+ )
+ }
+
+ return reduce(node.data.concat(children))
+ }
+
+ _findParent (node, depth) {
+ const parent = node.parent
+
+ if (!parent || parent.depth === 0) {
+ return
+ }
+
+ if (parent.children.length === parent.maxChildren || !parent.maxChildren) {
+ // this layer is full, may be able to traverse to a different branch
+ return this._findParent(parent, depth)
+ }
+
+ return parent
+ }
+}
+
+class Root extends SubTree {
+ constructor (layerRepeat) {
+ super(0, layerRepeat)
+
+ this.root.depth = 0
+ this.currentDepth = 1
+ }
+
+ addChild (child) {
+ this.root.children.push(child)
+ }
+
+ reduce (reduce) {
+ return reduce(this.root.data.concat(this.root.children))
+ }
+}
diff --git a/packages/ipfs-unixfs-importer/src/dag-builder/index.js b/packages/ipfs-unixfs-importer/src/dag-builder/index.js
new file mode 100644
index 00000000..a55888d4
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/dag-builder/index.js
@@ -0,0 +1,58 @@
+'use strict'
+
+const dirBuilder = require('./dir')
+const fileBuilder = require('./file')
+
+async function * dagBuilder (source, ipld, options) {
+ for await (const entry of source) {
+ if (entry.path) {
+ if (entry.path.substring(0, 2) === './') {
+ options.wrapWithDirectory = true
+ }
+
+ entry.path = entry.path
+ .split('/')
+ .filter(path => path && path !== '.')
+ .join('/')
+ }
+
+ if (entry.content) {
+ let source = entry.content
+
+ // wrap in iterator if it is array-like or not an iterator
+ if ((!source[Symbol.asyncIterator] && !source[Symbol.iterator]) || source.length !== undefined) {
+ source = {
+ [Symbol.iterator]: function * () {
+ yield entry.content
+ }
+ }
+ }
+
+ let chunker
+
+ if (typeof options.chunker === 'function') {
+ chunker = options.chunker
+ } else if (options.chunker === 'rabin') {
+ chunker = require('../chunker/rabin')
+ } else {
+ chunker = require('../chunker/fixed-size')
+ }
+
+ let chunkValidator
+
+ if (typeof options.chunkValidator === 'function') {
+ chunkValidator = options.chunkValidator
+ } else {
+ chunkValidator = require('./validate-chunks')
+ }
+
+ // item is a file
+ yield () => fileBuilder(entry, chunker(chunkValidator(source, options), options), ipld, options)
+ } else {
+ // item is a directory
+ yield () => dirBuilder(entry, ipld, options)
+ }
+ }
+}
+
+module.exports = dagBuilder
diff --git a/packages/ipfs-unixfs-importer/src/dag-builder/validate-chunks.js b/packages/ipfs-unixfs-importer/src/dag-builder/validate-chunks.js
new file mode 100644
index 00000000..bf3037d3
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/dag-builder/validate-chunks.js
@@ -0,0 +1,22 @@
+'use strict'
+
+const errCode = require('err-code')
+
+// make sure the content only emits buffer-a-likes
+async function * validateChunks (source) {
+ for await (const content of source) {
+ if (content.length === undefined) {
+ throw errCode(new Error('Content was invalid'), 'ERR_INVALID_CONTENT')
+ }
+
+ if (typeof content === 'string' || content instanceof String) {
+ yield Buffer.from(content, 'utf8')
+ } else if (Array.isArray(content)) {
+ yield Buffer.from(content)
+ } else {
+ yield content
+ }
+ }
+}
+
+module.exports = validateChunks
diff --git a/packages/ipfs-unixfs-importer/src/dir-flat.js b/packages/ipfs-unixfs-importer/src/dir-flat.js
new file mode 100644
index 00000000..50866044
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/dir-flat.js
@@ -0,0 +1,92 @@
+'use strict'
+
+const {
+ DAGLink,
+ DAGNode
+} = require('ipld-dag-pb')
+const UnixFS = require('ipfs-unixfs')
+const Dir = require('./dir')
+const persist = require('./utils/persist')
+
+class DirFlat extends Dir {
+ constructor (props, options) {
+ super(props, options)
+ this._children = {}
+ }
+
+ put (name, value) {
+ this.cid = undefined
+ this.size = undefined
+
+ this._children[name] = value
+ }
+
+ get (name) {
+ return this._children[name]
+ }
+
+ childCount () {
+ return Object.keys(this._children).length
+ }
+
+ directChildrenCount () {
+ return this.childCount()
+ }
+
+ onlyChild () {
+ return this._children[Object.keys(this._children)[0]]
+ }
+
+ * eachChildSeries () {
+ const keys = Object.keys(this._children)
+
+ for (let i = 0; i < keys.length; i++) {
+ const key = keys[i]
+
+ yield {
+ key: key,
+ child: this._children[key]
+ }
+ }
+ }
+
+ async * flush (path, ipld) {
+ const children = Object.keys(this._children)
+ const links = []
+
+ for (let i = 0; i < children.length; i++) {
+ let child = this._children[children[i]]
+
+ if (typeof child.flush === 'function') {
+ for await (const entry of child.flush(child.path, ipld)) {
+ child = entry
+
+ yield child
+ }
+ }
+
+ links.push(new DAGLink(children[i], child.size, child.cid))
+ }
+
+ const unixfs = new UnixFS({
+ type: 'directory',
+ mtime: this.mtime,
+ mode: this.mode
+ })
+
+ const node = new DAGNode(unixfs.marshal(), links)
+ const cid = await persist(node, ipld, this.options)
+
+ this.cid = cid
+ this.size = node.size
+
+ yield {
+ cid,
+ unixfs,
+ path,
+ size: node.size
+ }
+ }
+}
+
+module.exports = DirFlat
diff --git a/packages/ipfs-unixfs-importer/src/dir-sharded.js b/packages/ipfs-unixfs-importer/src/dir-sharded.js
new file mode 100644
index 00000000..e2959845
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/dir-sharded.js
@@ -0,0 +1,162 @@
+'use strict'
+
+const {
+ DAGLink,
+ DAGNode
+} = require('ipld-dag-pb')
+const UnixFS = require('ipfs-unixfs')
+const multihashing = require('multihashing-async')
+const Dir = require('./dir')
+const persist = require('./utils/persist')
+const Bucket = require('hamt-sharding')
+const mergeOptions = require('merge-options').bind({ ignoreUndefined: true })
+
+const hashFn = async function (value) {
+ const hash = await multihashing(Buffer.from(value, 'utf8'), 'murmur3-128')
+
+ // Multihashing inserts preamble of 2 bytes. Remove it.
+ // Also, murmur3 outputs 128 bit but, accidently, IPFS Go's
+ // implementation only uses the first 64, so we must do the same
+ // for parity..
+ const justHash = hash.slice(2, 10)
+ const length = justHash.length
+ const result = Buffer.alloc(length)
+ // TODO: invert buffer because that's how Go impl does it
+ for (let i = 0; i < length; i++) {
+ result[length - i - 1] = justHash[i]
+ }
+
+ return result
+}
+hashFn.code = 0x22 // TODO: get this from multihashing-async?
+
+const defaultOptions = {
+ hamtHashFn: hashFn,
+ hamtBucketBits: 8
+}
+
+class DirSharded extends Dir {
+ constructor (props, options) {
+ options = mergeOptions(defaultOptions, options)
+
+ super(props, options)
+
+ this._bucket = Bucket({
+ hashFn: options.hamtHashFn,
+ bits: options.hamtBucketBits
+ })
+ }
+
+ async put (name, value) {
+ await this._bucket.put(name, value)
+ }
+
+ get (name) {
+ return this._bucket.get(name)
+ }
+
+ childCount () {
+ return this._bucket.leafCount()
+ }
+
+ directChildrenCount () {
+ return this._bucket.childrenCount()
+ }
+
+ onlyChild () {
+ return this._bucket.onlyChild()
+ }
+
+ async * eachChildSeries () {
+ for await (const { key, value } of this._bucket.eachLeafSeries()) {
+ yield {
+ key,
+ child: value
+ }
+ }
+ }
+
+ async * flush (path, ipld) {
+ for await (const entry of flush(path, this._bucket, ipld, this, this.options)) {
+ yield entry
+ }
+ }
+}
+
+module.exports = DirSharded
+
+module.exports.hashFn = hashFn
+
+async function * flush (path, bucket, ipld, shardRoot, options) {
+ const children = bucket._children
+ const links = []
+
+ for (let i = 0; i < children.length; i++) {
+ const child = children.get(i)
+
+ if (!child) {
+ continue
+ }
+
+ const labelPrefix = i.toString(16).toUpperCase().padStart(2, '0')
+
+ if (Bucket.isBucket(child)) {
+ let shard
+
+ for await (const subShard of await flush('', child, ipld, null, options)) {
+ shard = subShard
+ }
+
+ links.push(new DAGLink(labelPrefix, shard.size, shard.cid))
+ } else if (typeof child.value.flush === 'function') {
+ const dir = child.value
+ let flushedDir
+
+ for await (const entry of dir.flush(dir.path, ipld)) {
+ flushedDir = entry
+
+ yield flushedDir
+ }
+
+ const label = labelPrefix + child.key
+ links.push(new DAGLink(label, flushedDir.size, flushedDir.cid))
+ } else {
+ const value = child.value
+
+ if (!value.node) {
+ if (value.cid) {
+ value.node = await ipld.get(value.cid)
+ } else {
+ continue
+ }
+ }
+
+ const label = labelPrefix + child.key
+ const size = value.node.length || value.node.size || value.node.Size
+
+ links.push(new DAGLink(label, size, value.cid))
+ }
+ }
+
+ // go-ipfs uses little endian, that's why we have to
+ // reverse the bit field before storing it
+ const data = Buffer.from(children.bitField().reverse())
+ const dir = new UnixFS({
+ type: 'hamt-sharded-directory',
+ data,
+ fanout: bucket.tableSize(),
+ hashType: options.hamtHashFn.code,
+ mtime: shardRoot && shardRoot.mtime,
+ mode: shardRoot && shardRoot.mode
+ })
+
+ const node = new DAGNode(dir.marshal(), links)
+ const cid = await persist(node, ipld, options)
+
+ yield {
+ cid,
+ unixfs: dir,
+ path,
+ size: node.size
+ }
+}
diff --git a/packages/ipfs-unixfs-importer/src/dir.js b/packages/ipfs-unixfs-importer/src/dir.js
new file mode 100644
index 00000000..24a1023c
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/dir.js
@@ -0,0 +1,8 @@
+'use strict'
+
+module.exports = class Dir {
+ constructor (props, options) {
+ this.options = options || {}
+ Object.assign(this, props)
+ }
+}
diff --git a/packages/ipfs-unixfs-importer/src/flat-to-shard.js b/packages/ipfs-unixfs-importer/src/flat-to-shard.js
new file mode 100644
index 00000000..1617b4dc
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/flat-to-shard.js
@@ -0,0 +1,47 @@
+'use strict'
+
+const DirSharded = require('./dir-sharded')
+
+module.exports = async function flatToShard (child, dir, threshold, options) {
+ let newDir = dir
+
+ if (dir.flat && dir.directChildrenCount() >= threshold) {
+ newDir = await convertToShard(dir, options)
+ }
+
+ const parent = newDir.parent
+
+ if (parent) {
+ if (newDir !== dir) {
+ if (child) {
+ child.parent = newDir
+ }
+
+ await parent.put(newDir.parentKey, newDir)
+ }
+
+ return flatToShard(newDir, parent, threshold, options)
+ }
+
+ return newDir
+}
+
+async function convertToShard (oldDir, options) {
+ const newDir = new DirSharded({
+ root: oldDir.root,
+ dir: true,
+ parent: oldDir.parent,
+ parentKey: oldDir.parentKey,
+ path: oldDir.path,
+ dirty: oldDir.dirty,
+ flat: false,
+ mtime: oldDir.mtime,
+ mode: oldDir.mode
+ }, options)
+
+ for await (const { key, child } of oldDir.eachChildSeries()) {
+ await newDir.put(key, child)
+ }
+
+ return newDir
+}
diff --git a/packages/ipfs-unixfs-importer/src/index.js b/packages/ipfs-unixfs-importer/src/index.js
new file mode 100644
index 00000000..052acff3
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/index.js
@@ -0,0 +1,85 @@
+'use strict'
+
+const parallelBatch = require('it-parallel-batch')
+const mergeOptions = require('merge-options').bind({ ignoreUndefined: true })
+
+const defaultOptions = {
+ chunker: 'fixed',
+ strategy: 'balanced', // 'flat', 'trickle'
+ rawLeaves: false,
+ onlyHash: false,
+ reduceSingleLeafToSelf: true,
+ codec: 'dag-pb',
+ hashAlg: 'sha2-256',
+ leafType: 'file', // 'raw'
+ cidVersion: 0,
+ progress: () => () => {},
+ shardSplitThreshold: 1000,
+ fileImportConcurrency: 50,
+ blockWriteConcurrency: 10,
+ minChunkSize: 262144,
+ maxChunkSize: 262144,
+ avgChunkSize: 262144,
+ window: 16,
+ polynomial: 17437180132763653, // https://github.com/ipfs/go-ipfs-chunker/blob/d0125832512163708c0804a3cda060e21acddae4/rabin.go#L11
+ maxChildrenPerNode: 174,
+ layerRepeat: 4,
+ wrapWithDirectory: false,
+ pin: true,
+ recursive: false,
+ hidden: false,
+ preload: true,
+ chunkValidator: null,
+ importBuffer: null
+}
+
+module.exports = async function * (source, ipld, options = {}) {
+ const opts = mergeOptions(defaultOptions, options)
+
+ if (options.cidVersion > 0 && options.rawLeaves === undefined) {
+ // if the cid version is 1 or above, use raw leaves as this is
+ // what go does.
+ opts.rawLeaves = true
+ }
+
+ if (options.hashAlg !== undefined && options.rawLeaves === undefined) {
+ // if a non-default hash alg has been specified, use raw leaves as this is
+ // what go does.
+ opts.rawLeaves = true
+ }
+
+ // go-ifps trickle dag defaults to unixfs raw leaves, balanced dag defaults to file leaves
+ if (options.strategy === 'trickle') {
+ opts.leafType = 'raw'
+ opts.reduceSingleLeafToSelf = false
+ }
+
+ if (options.format) {
+ opts.codec = options.format
+ }
+
+ let dagBuilder
+
+ if (typeof options.dagBuilder === 'function') {
+ dagBuilder = options.dagBuilder
+ } else {
+ dagBuilder = require('./dag-builder')
+ }
+
+ let treeBuilder
+
+ if (typeof options.treeBuilder === 'function') {
+ treeBuilder = options.treeBuilder
+ } else {
+ treeBuilder = require('./tree-builder')
+ }
+
+ for await (const entry of treeBuilder(parallelBatch(dagBuilder(source, ipld, opts), opts.fileImportConcurrency), ipld, opts)) {
+ yield {
+ cid: entry.cid,
+ path: entry.path,
+ unixfs: entry.unixfs,
+ size: entry.size
+ }
+ }
+}
diff --git a/packages/ipfs-unixfs-importer/src/tree-builder.js b/packages/ipfs-unixfs-importer/src/tree-builder.js
new file mode 100644
index 00000000..feb9f42d
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/tree-builder.js
@@ -0,0 +1,100 @@
+'use strict'
+
+const DirFlat = require('./dir-flat')
+const flatToShard = require('./flat-to-shard')
+const Dir = require('./dir')
+const toPathComponents = require('./utils/to-path-components')
+const errCode = require('err-code')
+const first = require('it-first')
+
+async function addToTree (elem, tree, options) {
+ const pathElems = toPathComponents(elem.path || '')
+ const lastIndex = pathElems.length - 1
+ let parent = tree
+ let currentPath = ''
+
+ for (let i = 0; i < pathElems.length; i++) {
+ const pathElem = pathElems[i]
+
+ currentPath += `${currentPath ? '/' : ''}${pathElem}`
+
+ const last = (i === lastIndex)
+ parent.dirty = true
+ parent.cid = null
+ parent.size = null
+
+ if (last) {
+ await parent.put(pathElem, elem)
+ tree = await flatToShard(null, parent, options.shardSplitThreshold, options)
+ } else {
+ let dir = await parent.get(pathElem)
+
+ if (!dir || !(dir instanceof Dir)) {
+ dir = new DirFlat({
+ dir: true,
+ parent: parent,
+ parentKey: pathElem,
+ path: currentPath,
+ dirty: true,
+ flat: true,
+ mtime: dir && dir.unixfs && dir.unixfs.mtime,
+ mode: dir && dir.unixfs && dir.unixfs.mode
+ }, options)
+ }
+
+ await parent.put(pathElem, dir)
+
+ parent = dir
+ }
+ }
+
+ return tree
+}
+
+async function * treeBuilder (source, ipld, options) {
+ let tree = new DirFlat({
+ root: true,
+ dir: true,
+ path: '',
+ dirty: true,
+ flat: true
+ }, options)
+
+ for await (const entry of source) {
+ if (!entry) {
+ continue
+ }
+
+ tree = await addToTree(entry, tree, options)
+
+ if (!entry.unixfs || !entry.unixfs.isDirectory()) {
+ yield entry
+ }
+ }
+
+ if (!options.wrapWithDirectory) {
+ if (tree.childCount() > 1) {
+ throw errCode(new Error('detected more than one root'), 'ERR_MORE_THAN_ONE_ROOT')
+ }
+
+ const unwrapped = await first(tree.eachChildSeries())
+
+ if (!unwrapped) {
+ return
+ }
+
+ tree = unwrapped.child
+ }
+
+ if (!(tree instanceof Dir)) {
+ if (tree && tree.unixfs && tree.unixfs.isDirectory()) {
+ yield tree
+ }
+
+ return
+ }
+
+ yield * tree.flush(tree.path, ipld)
+}
+
+module.exports = treeBuilder
diff --git a/packages/ipfs-unixfs-importer/src/utils/persist.js b/packages/ipfs-unixfs-importer/src/utils/persist.js
new file mode 100644
index 00000000..e6970b65
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/utils/persist.js
@@ -0,0 +1,33 @@
+'use strict'
+
+const mh = require('multihashing-async').multihash
+const mc = require('multicodec')
+
+const persist = (node, ipld, options) => {
+ if (!options.codec && node.length) {
+ options.cidVersion = 1
+ options.codec = 'raw'
+ }
+
+ if (!options.codec) {
+ options.codec = 'dag-pb'
+ }
+
+ if (isNaN(options.hashAlg)) {
+ options.hashAlg = mh.names[options.hashAlg]
+ }
+
+ if (options.hashAlg !== mh.names['sha2-256']) {
+ options.cidVersion = 1
+ }
+
+ if (options.format) {
+ options.codec = options.format
+ }
+
+ const format = mc[options.codec.toUpperCase().replace(/-/g, '_')]
+
+ return ipld.put(node, format, options)
+}
+
+module.exports = persist
diff --git a/packages/ipfs-unixfs-importer/src/utils/to-path-components.js b/packages/ipfs-unixfs-importer/src/utils/to-path-components.js
new file mode 100644
index 00000000..5e826272
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/src/utils/to-path-components.js
@@ -0,0 +1,11 @@
+'use strict'
+
+const toPathComponents = (path = '') => {
+ // split on / unless escaped with \
+ return (path
+ .trim()
+ .match(/([^\\^/]|\\\/)+/g) || [])
+ .filter(Boolean)
+}
+
+module.exports = toPathComponents
diff --git a/packages/ipfs-unixfs-importer/test/benchmark.spec.js b/packages/ipfs-unixfs-importer/test/benchmark.spec.js
new file mode 100644
index 00000000..fae3f483
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/benchmark.spec.js
@@ -0,0 +1,70 @@
+/* eslint-env mocha */
+'use strict'
+
+const importer = require('../src')
+
+const IPLD = require('ipld')
+const inMemory = require('ipld-in-memory')
+const bufferStream = require('it-buffer-stream')
+const all = require('it-all')
+
+const REPEATS = 10
+const FILE_SIZE = Math.pow(2, 20) * 500 // 500MB
+const CHUNK_SIZE = 65536
+
+describe.skip('benchmark', function () {
+ this.timeout(30 * 1000)
+
+ let ipld
+
+ before(async () => {
+ ipld = await inMemory(IPLD)
+ })
+
+ const times = []
+
+ after(() => {
+ console.info('Percent\tms') // eslint-disable-line no-console
+ times.forEach((time, index) => {
+ console.info(`${index}\t${parseInt(time / REPEATS)}`) // eslint-disable-line no-console
+ })
+ })
+
+ for (let i = 0; i < REPEATS; i++) {
+ it(`run ${i}`, async () => { // eslint-disable-line no-loop-func
+ this.timeout(0)
+
+ const size = FILE_SIZE
+ let read = 0
+ let lastDate = Date.now()
+ let lastPercent = 0
+
+ const options = {
+ progress: (prog) => {
+ read += prog
+
+ const percent = parseInt((read / size) * 100)
+
+ if (percent > lastPercent) {
+ times[percent] = (times[percent] || 0) + (Date.now() - lastDate)
+
+ lastDate = Date.now()
+ lastPercent = percent
+ }
+ }
+ }
+
+ const buf = Buffer.alloc(CHUNK_SIZE).fill(0)
+
+ await all(importer([{
+ path: '200Bytes.txt',
+ content: bufferStream(size, {
+ chunkSize: CHUNK_SIZE,
+ generator: () => {
+ return buf
+ }
+ })
+ }], ipld, options))
+ })
+ }
+})
diff --git a/packages/ipfs-unixfs-importer/test/builder-balanced.spec.js b/packages/ipfs-unixfs-importer/test/builder-balanced.spec.js
new file mode 100644
index 00000000..17242a31
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/builder-balanced.spec.js
@@ -0,0 +1,70 @@
+/* eslint-env mocha */
+'use strict'
+
+const chai = require('chai')
+chai.use(require('dirty-chai'))
+const expect = chai.expect
+const builder = require('../src/dag-builder/file/balanced')
+const all = require('it-all')
+
+function reduce (leaves) {
+ if (leaves.length > 1) {
+ return { children: leaves }
+ } else {
+ return leaves[0]
+ }
+}
+
+const options = {
+ maxChildrenPerNode: 3
+}
+
+describe('builder: balanced', () => {
+ it('reduces one value into itself', async () => {
+ const source = [1]
+
+ const result = await all(builder(source, reduce, options))
+
+ expect(result).to.deep.equal(source)
+ })
+
+ it('reduces 3 values into parent', async () => {
+ const source = [1, 2, 3]
+
+ const result = await all(builder(source, reduce, options))
+
+ expect(result).to.deep.equal([{
+ children: [1, 2, 3]
+ }])
+ })
+
+ it('obeys max children per node', async () => {
+ const source = [1, 2, 3, 4]
+
+ const result = await all(builder(source, reduce, options))
+
+ expect(result).to.deep.equal([{
+ children: [{
+ children: [1, 2, 3]
+ },
+ 4
+ ]
+ }])
+ })
+
+ it('refolds 2 parent nodes', async () => {
+ const source = [1, 2, 3, 4, 5, 6, 7]
+
+ const result = await all(builder(source, reduce, options))
+
+ expect(result).to.deep.equal([{
+ children: [{
+ children: [1, 2, 3]
+ }, {
+ children: [4, 5, 6]
+ },
+ 7
+ ]
+ }])
+ })
+})
diff --git a/packages/ipfs-unixfs-importer/test/builder-dir-sharding.spec.js b/packages/ipfs-unixfs-importer/test/builder-dir-sharding.spec.js
new file mode 100644
index 00000000..b52b07b8
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/builder-dir-sharding.spec.js
@@ -0,0 +1,294 @@
+/* eslint-env mocha */
+'use strict'
+
+const importer = require('../src')
+const exporter = require('ipfs-unixfs-exporter')
+
+const chai = require('chai')
+chai.use(require('dirty-chai'))
+const expect = chai.expect
+const IPLD = require('ipld')
+const inMemory = require('ipld-in-memory')
+const all = require('it-all')
+const last = require('it-last')
+
+describe('builder: directory sharding', () => {
+ let ipld
+
+ before(async () => {
+ ipld = await inMemory(IPLD)
+ })
+
+ describe('basic dirbuilder', () => {
+ it('yields a non-sharded dir', async () => {
+ const content = Buffer.from('i have the best bytes')
+ const nodes = await all(importer([{
+ path: 'a/b',
+ content
+ }], ipld, {
+ shardSplitThreshold: Infinity // never shard
+ }))
+
+ expect(nodes.length).to.equal(2)
+
+ expect(nodes[0].path).to.equal('a/b')
+ expect(nodes[1].path).to.equal('a')
+
+ const dirNode = await exporter(nodes[1].cid, ipld)
+ expect(dirNode.unixfs.type).to.equal('directory')
+
+ const fileNode = await exporter(nodes[0].cid, ipld)
+ expect(fileNode.unixfs.type).to.equal('file')
+ expect(Buffer.concat(await all(fileNode.content()))).to.deep.equal(content)
+ })
+
+ it('yields a sharded dir', async () => {
+ const nodes = await all(importer([{
+ path: 'a/b',
+ content: Buffer.from('i have the best bytes')
+ }], ipld, {
+ shardSplitThreshold: 0 // always shard
+ }))
+
+ expect(nodes.length).to.equal(2)
+ expect(nodes[0].path).to.equal('a/b')
+ expect(nodes[1].path).to.equal('a')
+
+ const node = await exporter(nodes[1].cid, ipld)
+
+ expect(node.unixfs.type).to.equal('hamt-sharded-directory')
+ })
+
+ it('exporting unsharded hash results in the correct files', async () => {
+ const content = 'i have the best bytes'
+ const nodes = await all(importer([{
+ path: 'a/b',
+ content: Buffer.from(content)
+ }], ipld, {
+ shardSplitThreshold: Infinity // never shard
+ }))
+
+ const nonShardedHash = nodes[1].cid
+
+ const dir = await exporter(nonShardedHash, ipld)
+ const files = await all(dir.content())
+
+ expect(files.length).to.equal(1)
+
+ const expectedHash = nonShardedHash.toBaseEncodedString()
+
+ expect(dir.path).to.be.eql(expectedHash)
+ expect(dir.cid.toBaseEncodedString()).to.be.eql(expectedHash)
+ expect(files[0].path).to.be.eql(expectedHash + '/b')
+ expect(files[0].unixfs.fileSize()).to.be.eql(content.length)
+
+ const fileContent = Buffer.concat(await all(files[0].content()))
+
+ expect(fileContent.toString()).to.equal(content)
+ })
+
+ it('exporting sharded hash results in the correct files', async () => {
+ const content = 'i have the best bytes'
+ const nodes = await all(importer([{
+ path: 'a/b',
+ content: Buffer.from(content)
+ }], ipld, {
+ shardSplitThreshold: 0 // always shard
+ }))
+
+ const shardedHash = nodes[1].cid
+
+ const dir = await exporter(shardedHash, ipld)
+ const files = await all(dir.content())
+
+ expect(files.length).to.equal(1)
+
+ const expectedHash = shardedHash.toBaseEncodedString()
+
+ expect(dir.path).to.be.eql(expectedHash)
+ expect(dir.cid.toBaseEncodedString()).to.be.eql(expectedHash)
+ expect(files[0].path).to.be.eql(expectedHash + '/b')
+ expect(files[0].unixfs.fileSize()).to.be.eql(content.length)
+
+ const fileContent = Buffer.concat(await all(files[0].content()))
+
+ expect(fileContent.toString()).to.equal(content)
+ })
+ })
+
+ describe('big dir', function () {
+ this.timeout(30 * 1000)
+
+ const maxDirs = 2000
+
+ it('imports a big dir', async () => {
+ const source = {
+ [Symbol.iterator]: function * () {
+ for (let i = 0; i < maxDirs; i++) {
+ yield {
+ path: 'big/' + i.toString().padStart(4, '0'),
+ content: Buffer.from(i.toString())
+ }
+ }
+ }
+ }
+
+ const nodes = await all(importer(source, ipld))
+
+ expect(nodes.length).to.equal(maxDirs + 1)
+ const last = nodes[nodes.length - 1]
+ expect(last.path).to.equal('big')
+ })
+
+ it('exports a big dir', async () => {
+ const source = {
+ [Symbol.iterator]: function * () {
+ for (let i = 0; i < maxDirs; i++) {
+ yield {
+ path: 'big/' + i.toString().padStart(4, '0'),
+ content: Buffer.from(i.toString())
+ }
+ }
+ }
+ }
+
+ const nodes = await all(importer(source, ipld))
+
+ expect(nodes.length).to.equal(maxDirs + 1) // files plus the containing directory
+
+ const dir = await exporter(nodes[nodes.length - 1].cid, ipld)
+
+ for await (const entry of dir.content()) {
+ const content = Buffer.concat(await all(entry.content()))
+ expect(content.toString()).to.equal(parseInt(entry.name, 10).toString())
+ }
+ })
+ })
+
+ describe('big nested dir', function () {
+ this.timeout(450 * 1000)
+
+ const maxDirs = 2000
+ const maxDepth = 3
+ let rootHash
+
+ before(async () => {
+ const source = {
+ [Symbol.iterator]: function * () {
+ let pending = maxDirs
+ let pendingDepth = maxDepth
+ let i = 0
+ let depth = 1
+
+ while (pendingDepth && pending) {
+ i++
+ const dir = []
+
+ for (let d = 0; d < depth; d++) {
+ dir.push('big')
+ }
+
+ yield {
+ path: dir.concat(i.toString().padStart(4, '0')).join('/'),
+ content: Buffer.from(i.toString())
+ }
+
+ pending--
+ if (!pending) {
+ pendingDepth--
+ pending = maxDirs
+ i = 0
+ depth++
+ }
+ }
+ }
+ }
+
+ const node = await last(importer(source, ipld))
+ expect(node.path).to.equal('big')
+
+ rootHash = node.cid
+ })
+
+ it('imports a big dir', async () => {
+ const dir = await exporter(rootHash, ipld)
+
+ const verifyContent = async (node) => {
+ if (node.unixfs.type === 'file') {
+ const bufs = await all(node.content())
+ const content = Buffer.concat(bufs)
+ expect(content.toString()).to.equal(parseInt(node.name, 10).toString())
+ } else {
+ for await (const entry of node.content()) {
+ await verifyContent(entry)
+ }
+ }
+ }
+
+ await verifyContent(dir)
+ })
+
+ it('exports a big dir', async () => {
+ const collectContent = async (node, entries = {}) => {
+ if (node.unixfs.type === 'file') {
+ entries[node.path] = {
+ content: Buffer.concat(await all(node.content())).toString()
+ }
+ } else {
+ entries[node.path] = node
+
+ for await (const entry of node.content()) {
+ await collectContent(entry, entries)
+ }
+ }
+
+ return entries
+ }
+
+ const eachPath = (path) => {
+ if (!index) {
+ // first dir
+ if (depth === 1) {
+ expect(path).to.equal(dir.cid.toBaseEncodedString())
+ }
+
+ const entry = entries[path]
+ expect(entry).to.exist()
+ expect(entry.content).to.not.be.a('string')
+ } else {
+ // dir entries
+ const pathElements = path.split('/')
+ expect(pathElements.length).to.equal(depth + 1)
+ const lastElement = pathElements[pathElements.length - 1]
+ expect(lastElement).to.equal(index.toString().padStart(4, '0'))
+ expect(entries[path].content).to.equal(index.toString())
+ }
+ index++
+ if (index > maxDirs) {
+ index = 0
+ depth++
+ }
+ }
+
+ const dir = await exporter(rootHash, ipld)
+
+ const entries = await collectContent(dir)
+ let index = 0
+ let depth = 1
+
+ const paths = Object.keys(entries).sort()
+ expect(paths.length).to.equal(maxDepth * maxDirs + maxDepth)
+ paths.forEach(eachPath)
+ })
+
+ it('exports a big dir with subpath', async () => {
+ const exportHash = rootHash.toBaseEncodedString() + '/big/big/2000'
+
+ const node = await exporter(exportHash, ipld)
+ expect(node.path).to.equal(exportHash)
+
+ const content = Buffer.concat(await all(node.content()))
+ expect(content.toString()).to.equal('2000')
+ })
+ })
+})
diff --git a/packages/ipfs-unixfs-importer/test/builder-flat.spec.js b/packages/ipfs-unixfs-importer/test/builder-flat.spec.js
new file mode 100644
index 00000000..e3f0339e
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/builder-flat.spec.js
@@ -0,0 +1,32 @@
+/* eslint-env mocha */
+'use strict'
+
+const chai = require('chai')
+chai.use(require('dirty-chai'))
+const expect = chai.expect
+const builder = require('../src/dag-builder/file/flat')
+const all = require('it-all')
+
+function reduce (leaves) {
+ if (leaves.length > 1) {
+ return { children: leaves }
+ } else {
+ return leaves[0]
+ }
+}
+
+describe('builder: flat', () => {
+ it('reduces one value into itself', async () => {
+ const source = [1]
+ const result = await all(builder(source, reduce))
+
+ expect(result).to.be.eql([1])
+ })
+
+ it('reduces 2 values into parent', async () => {
+ const source = [1, 2]
+ const result = await all(builder(source, reduce))
+
+ expect(result).to.be.eql([{ children: [1, 2] }])
+ })
+})
diff --git a/packages/ipfs-unixfs-importer/test/builder-only-hash.spec.js b/packages/ipfs-unixfs-importer/test/builder-only-hash.spec.js
new file mode 100644
index 00000000..e7e7642c
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/builder-only-hash.spec.js
@@ -0,0 +1,47 @@
+/* eslint-env mocha */
+'use strict'
+
+const chai = require('chai')
+chai.use(require('dirty-chai'))
+const expect = chai.expect
+const IPLD = require('ipld')
+const inMemory = require('ipld-in-memory')
+const builder = require('../src/dag-builder')
+const all = require('it-all')
+
+describe('builder: onlyHash', () => {
+ let ipld
+
+ before(async () => {
+ ipld = await inMemory(IPLD)
+ })
+
+ it('will only chunk and hash if passed an "onlyHash" option', async () => {
+ const nodes = await all(builder([{
+ path: 'foo.txt',
+ content: Buffer.from([0, 1, 2, 3, 4])
+ }], ipld, {
+ onlyHash: true,
+ chunker: 'fixed',
+ strategy: 'balanced',
+ progress: () => {},
+ leafType: 'file',
+ reduceSingleLeafToSelf: true,
+ format: 'dag-pb',
+ hashAlg: 'sha2-256',
+ wrap: true,
+ maxChunkSize: 1024,
+ maxChildrenPerNode: 254
+ }))
+
+ expect(nodes.length).to.equal(1)
+
+ try {
+ await ipld.get((await nodes[0]()).cid)
+
+ throw new Error('Should have errored')
+ } catch (err) {
+ expect(err.code).to.equal('ERR_NOT_FOUND')
+ }
+ })
+})
diff --git a/packages/ipfs-unixfs-importer/test/builder-trickle-dag.spec.js b/packages/ipfs-unixfs-importer/test/builder-trickle-dag.spec.js
new file mode 100644
index 00000000..ba6c239a
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/builder-trickle-dag.spec.js
@@ -0,0 +1,574 @@
+/* eslint-env mocha */
+'use strict'
+
+const chai = require('chai')
+chai.use(require('dirty-chai'))
+const expect = chai.expect
+const builder = require('../src/dag-builder/file/trickle')
+const all = require('it-all')
+
+const createValues = (max) => {
+ const output = []
+
+ for (let i = 0; i < max; i++) {
+ output.push(i)
+ }
+
+ return output
+}
+
+function reduce (leaves) {
+ if (leaves.length > 1) {
+ return { children: leaves }
+ } else {
+ return leaves[0]
+ }
+}
+
+const options = {
+ maxChildrenPerNode: 3,
+ layerRepeat: 2
+}
+
+describe('builder: trickle', () => {
+ it('reduces one value into itself', async () => {
+ const result = await all(builder([1], reduce, options))
+
+ expect(result).to.deep.equal([1])
+ })
+
+ it('reduces 3 values into parent', async () => {
+ const result = await all(builder(createValues(3), reduce, options))
+
+ expect(result).to.deep.equal([{
+ children: [
+ 0,
+ 1,
+ 2
+ ]
+ }])
+ })
+
+ it('reduces 6 values correctly', async () => {
+ const result = await all(builder(createValues(6), reduce, options))
+
+ expect(result).to.deep.equal([{
+ children: [
+ 0,
+ 1,
+ 2,
+ {
+ children: [
+ 3,
+ 4,
+ 5
+ ]
+ }
+ ]
+ }])
+ })
+
+ it('reduces 9 values correctly', async () => {
+ const result = await all(builder(createValues(9), reduce, options))
+
+ expect(result).to.deep.equal([{
+ children: [
+ 0,
+ 1,
+ 2,
+ {
+ children: [
+ 3,
+ 4,
+ 5
+ ]
+ },
+ {
+ children: [
+ 6,
+ 7,
+ 8
+ ]
+ }
+ ]
+ }])
+ })
+
+ it('reduces 12 values correctly', async () => {
+ const result = await all(builder(createValues(12), reduce, options))
+
+ expect(result).to.deep.equal([{
+ children: [
+ 0,
+ 1,
+ 2,
+ {
+ children: [
+ 3,
+ 4,
+ 5
+ ]
+ },
+ {
+ children: [
+ 6,
+ 7,
+ 8
+ ]
+ },
+ {
+ children: [
+ 9,
+ 10,
+ 11
+ ]
+ }
+ ]
+ }])
+ })
+
+ it('reduces 21 values correctly', async () => {
+ const result = await all(builder(createValues(21), reduce, options))
+
+ expect(result).to.deep.equal([{
+ children: [
+ 0,
+ 1,
+ 2,
+ {
+ children: [
+ 3,
+ 4,
+ 5
+ ]
+ },
+ {
+ children: [
+ 6,
+ 7,
+ 8
+ ]
+ },
+ {
+ children: [
+ 9,
+ 10,
+ 11,
+ {
+ children: [
+ 12,
+ 13,
+ 14
+ ]
+ },
+ {
+ children: [
+ 15,
+ 16,
+ 17
+ ]
+ }
+ ]
+ },
+ {
+ children: [
+ 18,
+ 19,
+ 20
+ ]
+ }
+ ]
+ }])
+ })
+
+ it('reduces 68 values correctly', async () => {
+ const result = await all(builder(createValues(68), reduce, options))
+
+ expect(result).to.deep.equal([
+ {
+ children: [
+ 0,
+ 1,
+ 2,
+ {
+ children: [
+ 3,
+ 4,
+ 5
+ ]
+ },
+ {
+ children: [
+ 6,
+ 7,
+ 8
+ ]
+ },
+ {
+ children: [
+ 9,
+ 10,
+ 11,
+ {
+ children: [
+ 12,
+ 13,
+ 14
+ ]
+ },
+ {
+ children: [
+ 15,
+ 16,
+ 17
+ ]
+ }
+ ]
+ },
+ {
+ children: [
+ 18,
+ 19,
+ 20,
+ {
+ children: [
+ 21,
+ 22,
+ 23
+ ]
+ },
+ {
+ children: [
+ 24,
+ 25,
+ 26
+ ]
+ }
+ ]
+ },
+ {
+ children: [
+ 27,
+ 28,
+ 29,
+ {
+ children: [
+ 30,
+ 31,
+ 32
+ ]
+ },
+ {
+ children: [
+ 33,
+ 34,
+ 35
+ ]
+ },
+ {
+ children: [
+ 36,
+ 37,
+ 38,
+ {
+ children: [
+ 39,
+ 40,
+ 41
+ ]
+ },
+ {
+ children: [
+ 42,
+ 43,
+ 44
+ ]
+ }
+ ]
+ },
+ {
+ children: [
+ 45,
+ 46,
+ 47,
+ {
+ children: [
+ 48,
+ 49,
+ 50
+ ]
+ },
+ {
+ children: [
+ 51,
+ 52,
+ 53
+ ]
+ }
+ ]
+ }
+ ]
+ },
+ {
+ children: [
+ 54,
+ 55,
+ 56,
+ {
+ children: [
+ 57,
+ 58,
+ 59
+ ]
+ },
+ {
+ children: [
+ 60,
+ 61,
+ 62
+ ]
+ },
+ {
+ children: [
+ 63,
+ 64,
+ 65,
+ {
+ children: [
+ 66,
+ 67
+ ]
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ }
+ ])
+ })
+
+ it('reduces 93 values correctly', async () => {
+ const result = await all(builder(createValues(93), reduce, options))
+
+ expect(result).to.deep.equal([
+ {
+ children: [
+ 0,
+ 1,
+ 2,
+ {
+ children: [
+ 3,
+ 4,
+ 5
+ ]
+ },
+ {
+ children: [
+ 6,
+ 7,
+ 8
+ ]
+ },
+ {
+ children: [
+ 9,
+ 10,
+ 11,
+ {
+ children: [
+ 12,
+ 13,
+ 14
+ ]
+ },
+ {
+ children: [
+ 15,
+ 16,
+ 17
+ ]
+ }
+ ]
+ },
+ {
+ children: [
+ 18,
+ 19,
+ 20,
+ {
+ children: [
+ 21,
+ 22,
+ 23
+ ]
+ },
+ {
+ children: [
+ 24,
+ 25,
+ 26
+ ]
+ }
+ ]
+ },
+ {
+ children: [
+ 27,
+ 28,
+ 29,
+ {
+ children: [
+ 30,
+ 31,
+ 32
+ ]
+ },
+ {
+ children: [
+ 33,
+ 34,
+ 35
+ ]
+ },
+ {
+ children: [
+ 36,
+ 37,
+ 38,
+ {
+ children: [
+ 39,
+ 40,
+ 41
+ ]
+ },
+ {
+ children: [
+ 42,
+ 43,
+ 44
+ ]
+ }
+ ]
+ },
+ {
+ children: [
+ 45,
+ 46,
+ 47,
+ {
+ children: [
+ 48,
+ 49,
+ 50
+ ]
+ },
+ {
+ children: [
+ 51,
+ 52,
+ 53
+ ]
+ }
+ ]
+ }
+ ]
+ },
+ {
+ children: [
+ 54,
+ 55,
+ 56,
+ {
+ children: [
+ 57,
+ 58,
+ 59
+ ]
+ },
+ {
+ children: [
+ 60,
+ 61,
+ 62
+ ]
+ },
+ {
+ children: [
+ 63,
+ 64,
+ 65,
+ {
+ children: [
+ 66,
+ 67,
+ 68
+ ]
+ },
+ {
+ children: [
+ 69,
+ 70,
+ 71
+ ]
+ }
+ ]
+ },
+ {
+ children: [
+ 72,
+ 73,
+ 74,
+ {
+ children: [
+ 75,
+ 76,
+ 77
+ ]
+ },
+ {
+ children: [
+ 78,
+ 79,
+ 80
+ ]
+ }
+ ]
+ }
+ ]
+ },
+ {
+ children: [
+ 81,
+ 82,
+ 83,
+ {
+ children: [
+ 84,
+ 85,
+ 86
+ ]
+ },
+ {
+ children: [
+ 87,
+ 88,
+ 89
+ ]
+ },
+ {
+ children: [
+ 90,
+ 91,
+ 92
+ ]
+ }
+ ]
+ }
+ ]
+ }
+ ])
+ })
+})
diff --git a/packages/ipfs-unixfs-importer/test/builder.spec.js b/packages/ipfs-unixfs-importer/test/builder.spec.js
new file mode 100644
index 00000000..c9d5522d
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/builder.spec.js
@@ -0,0 +1,108 @@
+/* eslint-env mocha */
+'use strict'
+
+const chai = require('chai')
+chai.use(require('dirty-chai'))
+const expect = chai.expect
+const mh = require('multihashing-async').multihash
+const IPLD = require('ipld')
+const inMemory = require('ipld-in-memory')
+const UnixFS = require('ipfs-unixfs')
+const builder = require('../src/dag-builder')
+const first = require('it-first')
+
+describe('builder', () => {
+ let ipld
+
+ before(async () => {
+ ipld = await inMemory(IPLD)
+ })
+
+ const testMultihashes = Object.keys(mh.names).slice(1, 40)
+ const opts = {
+ strategy: 'flat',
+ chunker: 'fixed',
+ leafType: 'file',
+ reduceSingleLeafToSelf: true,
+ format: 'dag-pb',
+ hashAlg: 'sha2-256',
+ progress: () => {},
+ maxChunkSize: 262144
+ }
+
+ it('allows multihash hash algorithm to be specified', async () => {
+ for (let i = 0; i < testMultihashes.length; i++) {
+ const hashAlg = testMultihashes[i]
+ const options = {
+ ...opts,
+ hashAlg
+ }
+ const content = String(Math.random() + Date.now())
+ const inputFile = {
+ path: content + '.txt',
+ content: Buffer.from(content)
+ }
+
+ const imported = await (await first(builder([inputFile], ipld, options)))()
+
+ expect(imported).to.exist()
+
+ // Verify multihash has been encoded using hashAlg
+ expect(mh.decode(imported.cid.multihash).name).to.equal(hashAlg)
+
+ // Fetch using hashAlg encoded multihash
+ const node = await ipld.get(imported.cid)
+
+ const fetchedContent = UnixFS.unmarshal(node.Data).data
+ expect(fetchedContent).to.deep.equal(inputFile.content)
+ }
+ })
+
+ it('allows multihash hash algorithm to be specified for big file', async function () {
+ this.timeout(30000)
+
+ for (let i = 0; i < testMultihashes.length; i++) {
+ const hashAlg = testMultihashes[i]
+ const options = {
+ ...opts,
+ hashAlg
+ }
+ const content = String(Math.random() + Date.now())
+ const inputFile = {
+ path: content + '.txt',
+ // Bigger than maxChunkSize
+ content: Buffer.alloc(262144 + 5).fill(1)
+ }
+
+ const imported = await (await first(builder([inputFile], ipld, options)))()
+
+ expect(imported).to.exist()
+ expect(mh.decode(imported.cid.multihash).name).to.equal(hashAlg)
+ }
+ })
+
+ it('allows multihash hash algorithm to be specified for a directory', async () => {
+ for (let i = 0; i < testMultihashes.length; i++) {
+ const hashAlg = testMultihashes[i]
+
+ const options = {
+ ...opts,
+ hashAlg
+ }
+ const inputFile = {
+ path: `${String(Math.random() + Date.now())}-dir`,
+ content: null
+ }
+
+ const imported = await (await first(builder([Object.assign({}, inputFile)], ipld, options)))()
+
+ expect(mh.decode(imported.cid.multihash).name).to.equal(hashAlg)
+
+ // Fetch using hashAlg encoded multihash
+ const node = await ipld.get(imported.cid)
+
+ const meta = UnixFS.unmarshal(node.Data)
+ expect(meta.type).to.equal('directory')
+ }
+ })
+})
diff --git a/packages/ipfs-unixfs-importer/test/chunker-custom.spec.js b/packages/ipfs-unixfs-importer/test/chunker-custom.spec.js
new file mode 100644
index 00000000..8bc80dda
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/chunker-custom.spec.js
@@ -0,0 +1,73 @@
+/* eslint-env mocha */
+'use strict'
+
+const importer = require('../src')
+
+const chai = require('chai')
+chai.use(require('dirty-chai'))
+const expect = chai.expect
+const IPLD = require('ipld')
+const inMemory = require('ipld-in-memory')
+const mc = require('multicodec')
+
+// eslint bug https://github.com/eslint/eslint/issues/12459
+// eslint-disable-next-line require-await
+const iter = async function * () {
+ yield Buffer.from('one')
+ yield Buffer.from('two')
+}
+
+describe('custom chunker', function () {
+ let inmem
+
+ const fromPartsTest = (iter, size) => async () => {
+ for await (const part of importer([{
+ content: iter()
+ }], inmem, {
+ chunkValidator: source => source,
+ chunker: source => source,
+ bufferImporter: async function * (file, source, ipld, options) {
+ for await (const item of source) {
+ yield () => Promise.resolve(item)
+ }
+ }
+ })) {
+ expect(part.size).to.equal(size)
+ }
+ }
+
+ before(async () => {
+ inmem = await inMemory(IPLD)
+ })
+
+ it('keeps custom chunking', async () => {
+ const chunker = source => source
+ const content = iter()
+ for await (const part of importer([{ path: 'test', content }], inmem, {
+ chunker
+ })) {
+ expect(part.size).to.equal(116)
+ }
+ })
+
+ // eslint bug https://github.com/eslint/eslint/issues/12459
+ const multi = async function * () {
+ yield {
+ size: 11,
+ cid: await inmem.put(Buffer.from('hello world'), mc.RAW)
+ }
+ yield {
+ size: 11,
+ cid: await inmem.put(Buffer.from('hello world'), mc.RAW)
+ }
+ }
+ it('works with multiple parts', fromPartsTest(multi, 120))
+
+ const single = async function * () {
+ yield {
+ size: 11,
+ cid: await inmem.put(Buffer.from('hello world'), mc.RAW)
+ }
+ }
+ it('works with single part', fromPartsTest(single, 19))
+})
diff --git a/packages/ipfs-unixfs-importer/test/chunker-fixed-size.spec.js b/packages/ipfs-unixfs-importer/test/chunker-fixed-size.spec.js
new file mode 100644
index 00000000..276702ab
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/chunker-fixed-size.spec.js
@@ -0,0 +1,85 @@
+/* eslint-env mocha */
+'use strict'
+
+const chunker = require('../src/chunker/fixed-size')
+const chai = require('chai')
+chai.use(require('dirty-chai'))
+const expect = chai.expect
+const isNode = require('detect-node')
+const all = require('it-all')
+const loadFixture = require('aegir/fixtures')
+const rawFile = loadFixture((isNode ? __dirname : 'test') + '/fixtures/1MiB.txt')
+
+describe('chunker: fixed size', function () {
+ this.timeout(30000)
+
+ it('chunks non flat buffers', async () => {
+ const b1 = Buffer.alloc(2 * 256)
+ const b2 = Buffer.alloc(1 * 256)
+ const b3 = Buffer.alloc(5 * 256)
+
+ b1.fill('a')
+ b2.fill('b')
+ b3.fill('c')
+
+ const chunks = await all(chunker([b1, b2, b3], {
+ maxChunkSize: 256
+ }))
+
+ expect(chunks).to.have.length(8)
+ chunks.forEach((chunk) => {
+ expect(chunk).to.have.length(256)
+ })
+ })
+
+ it('256 Bytes chunks', async () => {
+ const input = []
+ const buf = Buffer.from('a')
+
+ for (let i = 0; i < (256 * 12); i++) {
+ input.push(buf)
+ }
+ const chunks = await all(chunker(input, {
+ maxChunkSize: 256
+ }))
+
+ expect(chunks).to.have.length(12)
+ chunks.forEach((chunk) => {
+ expect(chunk).to.have.length(256)
+ })
+ })
+
+ it('256 KiB chunks', async () => {
+ const KiB256 = 262144
+ const chunks = await all(chunker([rawFile], {
+ maxChunkSize: KiB256
+ }))
+
+ expect(chunks).to.have.length(4)
+ chunks.forEach((chunk) => {
+ expect(chunk).to.have.length(KiB256)
+ })
+ })
+
+ it('256 KiB chunks of non scalar filesize', async () => {
+ const KiB256 = 262144
+ const file = Buffer.concat([rawFile, Buffer.from('hello')])
+
+ const chunks = await all(chunker([file], {
+ maxChunkSize: KiB256
+ }))
+
+ expect(chunks).to.have.length(5)
+ let counter = 0
+
+ chunks.forEach((chunk) => {
+ if (chunk.length < KiB256) {
+ counter++
+ } else {
+ expect(chunk).to.have.length(KiB256)
+ }
+ })
+
+ expect(counter).to.equal(1)
+ })
+})
diff --git a/packages/ipfs-unixfs-importer/test/chunker-rabin.spec.js b/packages/ipfs-unixfs-importer/test/chunker-rabin.spec.js
new file mode 100644
index 00000000..9f9a4aff
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/chunker-rabin.spec.js
@@ -0,0 +1,135 @@
+/* eslint-env mocha */
+'use strict'
+
+const chunker = require('../src/chunker/rabin')
+const chai = require('chai')
+chai.use(require('dirty-chai'))
+const expect = chai.expect
+const loadFixture = require('aegir/fixtures')
+const isNode = require('detect-node')
+const all = require('it-all')
+
+const rawFile = loadFixture((isNode ? __dirname : 'test') + '/fixtures/1MiB.txt')
+
+describe('chunker: rabin', function () {
+ this.timeout(30000)
+
+ const defaultOptions = {
+ avgChunkSize: 262144,
+ window: 64,
+ polynomial: 17437180132763653
+ }
+
+ it('chunks non flat buffers', async () => {
+ const b1 = Buffer.alloc(2 * 256)
+ const b2 = Buffer.alloc(1 * 256)
+ const b3 = Buffer.alloc(5 * 256)
+
+ b1.fill('a')
+ b2.fill('b')
+ b3.fill('c')
+
+ const chunks = await all(chunker([b1, b2, b3], {
+ ...defaultOptions,
+ minChunkSize: 48,
+ avgChunkSize: 96,
+ maxChunkSize: 192
+ }))
+
+ const size = chunks.reduce((acc, curr) => acc + curr.length, 0)
+
+ expect(size).to.equal(b1.length + b2.length + b3.length)
+
+ chunks.forEach((chunk, index) => {
+ if (index === chunks.length - 1) {
+ expect(chunk.length).to.equal(128)
+ } else {
+ expect(chunk.length).to.equal(192)
+ }
+ })
+ })
+
+ it('uses default min and max chunk size when only avgChunkSize is specified', async () => {
+ const b1 = Buffer.alloc(10 * 256)
+ b1.fill('a')
+
+ const chunks = await all(chunker([b1], {
+ ...defaultOptions,
+ maxChunkSize: 262144,
+ minChunkSize: 18,
+ avgChunkSize: 256
+ }))
+
+ chunks.forEach((chunk) => {
+ expect(chunk).to.have.length.gte(256 / 3)
+ expect(chunk).to.have.length.lte(256 * (256 / 2))
+ })
+ })
+
+ it('256 KiB avg chunks of non scalar filesize', async () => {
+ const KiB256 = 262144
+ const file = Buffer.concat([rawFile, Buffer.from('hello')])
+ const opts = {
+ ...defaultOptions,
+ minChunkSize: KiB256 / 3,
+ avgChunkSize: KiB256,
+ maxChunkSize: KiB256 + (KiB256 / 2)
+ }
+
+ const chunks = await all(chunker([file], opts))
+
+ chunks.forEach((chunk) => {
+ expect(chunk).to.have.length.gte(opts.minChunkSize)
+ expect(chunk).to.have.length.lte(opts.maxChunkSize)
+ })
+ })
+
+ it('throws when min chunk size is too small', async () => {
+ const opts = {
+ ...defaultOptions,
+ minChunkSize: 1,
+ maxChunkSize: 100
+ }
+
+ try {
+ await all(chunker([], opts))
+ throw new Error('Should have thrown')
+ } catch (err) {
+ expect(err.code).to.equal('ERR_INVALID_MIN_CHUNK_SIZE')
+ }
+ })
+
+ it('throws when avg chunk size is not specified', async () => {
+ const opts = {
+ ...defaultOptions,
+ avgChunkSize: undefined
+ }
+
+ try {
+ await all(chunker([], opts))
+ throw new Error('Should have thrown')
+ } catch (err) {
+ expect(err.code).to.equal('ERR_INVALID_AVG_CHUNK_SIZE')
+ }
+ })
+
+ it('uses the min chunk size when max and avg are too small', async () => {
+ const file = Buffer.concat([rawFile, Buffer.from('hello')])
+ const opts = {
+ ...defaultOptions,
+ minChunkSize: 100,
+ maxChunkSize: 5,
+ avgChunkSize: 5
+ }
+
+ const chunks = await all(chunker([file], opts))
+
+ chunks.forEach((chunk, index) => {
+ if (index === chunks.length - 1) {
+ expect(chunk.length).to.equal(81)
+ } else {
+ expect(chunk.length).to.equal(100)
+ }
+ })
+ })
+})
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt
new file mode 100644
index 00000000..6e306c55
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.block b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.block
new file mode 100644
index 00000000..f57749f0
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.block differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block0 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block0
new file mode 100644
index 00000000..a6e00f34
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block0 differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block1 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block1
new file mode 100644
index 00000000..f4c039c2
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block1 differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block2 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block2
new file mode 100644
index 00000000..64ce0aeb
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block2 differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block3 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block3
new file mode 100644
index 00000000..c1f9899a
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block3 differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block4 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block4
new file mode 100644
index 00000000..cbd601a6
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.link-block4 differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-file b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-file
new file mode 100644
index 00000000..e7229e0e
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-file
@@ -0,0 +1 @@
+L
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw0 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw0
new file mode 100644
index 00000000..36ff3333
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw0 differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw1 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw1
new file mode 100644
index 00000000..fa626274
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw1 differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw2 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw2
new file mode 100644
index 00000000..f7ea5c2e
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw2 differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw3 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw3
new file mode 100644
index 00000000..de99ffe5
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw3 differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw4 b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw4
new file mode 100644
index 00000000..0e438a15
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1.2MiB.txt.unixfs-raw4 differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/1MiB.txt b/packages/ipfs-unixfs-importer/test/fixtures/1MiB.txt
new file mode 100644
index 00000000..60770c23
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/1MiB.txt differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt b/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt
new file mode 100644
index 00000000..d95023c7
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt
@@ -0,0 +1,4 @@
+wxxM{
+DzH/&^RS/v,R
+=Ng~pf1\[>%U1@Q׀2&m6qQ]|!KE~J
֕읝ojbn3eT)D+;s
+컓:Ty!c3\*T7E?[Pv}A+cx~e
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt.block b/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt.block
new file mode 100644
index 00000000..a655cf83
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt.block
@@ -0,0 +1,5 @@
+
+wxxM{
+DzH/&^RS/v,R
+=Ng~pf1\[>%U1@Q׀2&m6qQ]|!KE~J
֕읝ojbn3eT)D+;s
+컓:Ty!c3\*T7E?[Pv}A+cx~e
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt.unixfs-file b/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt.unixfs-file
new file mode 100644
index 00000000..b93a6da8
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/fixtures/200Bytes.txt.unixfs-file
@@ -0,0 +1,4 @@
+wxxM{
+DzH/&^RS/v,R
+=Ng~pf1\[>%U1@Q׀2&m6qQ]|!KE~J
֕읝ojbn3eT)D+;s
+컓:Ty!c3\*T7E?[Pv}A+cx~e
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-big.block b/packages/ipfs-unixfs-importer/test/fixtures/dir-big.block
new file mode 100644
index 00000000..ce734230
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/fixtures/dir-big.block
@@ -0,0 +1,4 @@
+4
+" si"¹W%U1@Q׀2&m6qQ]|!KE~J
֕읝ojbn3eT)D+;s
+컓:Ty!c3\*T7E?[Pv}A+cx~e
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-nested/level-1/200Bytes.txt b/packages/ipfs-unixfs-importer/test/fixtures/dir-nested/level-1/200Bytes.txt
new file mode 100644
index 00000000..d95023c7
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/fixtures/dir-nested/level-1/200Bytes.txt
@@ -0,0 +1,4 @@
+wxxM{
+DzH/&^RS/v,R
+=Ng~pf1\[>%U1@Q׀2&m6qQ]|!KE~J
֕읝ojbn3eT)D+;s
+컓:Ty!c3\*T7E?[Pv}A+cx~e
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-small.block b/packages/ipfs-unixfs-importer/test/fixtures/dir-small.block
new file mode 100644
index 00000000..5accb645
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/fixtures/dir-small.block
@@ -0,0 +1,3 @@
+5
+" $G,A4{xZ/.D`200Bytes.txt
+
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-small.unixfs-dir b/packages/ipfs-unixfs-importer/test/fixtures/dir-small.unixfs-dir
new file mode 100644
index 00000000..e19a122a
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/fixtures/dir-small.unixfs-dir
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-small/200Bytes.txt b/packages/ipfs-unixfs-importer/test/fixtures/dir-small/200Bytes.txt
new file mode 100644
index 00000000..d95023c7
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/fixtures/dir-small/200Bytes.txt
@@ -0,0 +1,4 @@
+wxxM{
+DzH/&^RS/v,R
+=Ng~pf1\[>%U1@Q׀2&m6qQ]|!KE~J
֕읝ojbn3eT)D+;s
+컓:Ty!c3\*T7E?[Pv}A+cx~e
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/dir-with-empty-files/empty-file.txt b/packages/ipfs-unixfs-importer/test/fixtures/dir-with-empty-files/empty-file.txt
new file mode 100644
index 00000000..e69de29b
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/empty.txt b/packages/ipfs-unixfs-importer/test/fixtures/empty.txt
new file mode 100644
index 00000000..e69de29b
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/foo-big/1.2MiB.txt b/packages/ipfs-unixfs-importer/test/fixtures/foo-big/1.2MiB.txt
new file mode 100644
index 00000000..6e306c55
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/foo-big/1.2MiB.txt differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/foo/bar/200Bytes.txt b/packages/ipfs-unixfs-importer/test/fixtures/foo/bar/200Bytes.txt
new file mode 100644
index 00000000..d95023c7
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/fixtures/foo/bar/200Bytes.txt
@@ -0,0 +1,4 @@
+wxxM{
+DzH/&^RS/v,R
+=Ng~pf1\[>%U1@Q׀2&m6qQ]|!KE~J
֕읝ojbn3eT)D+;s
+컓:Ty!c3\*T7E?[Pv}A+cx~e
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/ipfsmarket-1.ogv b/packages/ipfs-unixfs-importer/test/fixtures/ipfsmarket-1.ogv
new file mode 100644
index 00000000..55e83f48
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/ipfsmarket-1.ogv differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/pam/1.2MiB.txt b/packages/ipfs-unixfs-importer/test/fixtures/pam/1.2MiB.txt
new file mode 100644
index 00000000..6e306c55
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/pam/1.2MiB.txt differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/pam/pum/1.2MiB.txt b/packages/ipfs-unixfs-importer/test/fixtures/pam/pum/1.2MiB.txt
new file mode 100644
index 00000000..6e306c55
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/pam/pum/1.2MiB.txt differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/pam/pum/200Bytes.txt b/packages/ipfs-unixfs-importer/test/fixtures/pam/pum/200Bytes.txt
new file mode 100644
index 00000000..d95023c7
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/fixtures/pam/pum/200Bytes.txt
@@ -0,0 +1,4 @@
+wxxM{
+DzH/&^RS/v,R
+=Ng~pf1\[>%U1@Q׀2&m6qQ]|!KE~J
֕읝ojbn3eT)D+;s
+컓:Ty!c3\*T7E?[Pv}A+cx~e
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/pim/1.2MiB.txt b/packages/ipfs-unixfs-importer/test/fixtures/pim/1.2MiB.txt
new file mode 100644
index 00000000..6e306c55
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/pim/1.2MiB.txt differ
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/pim/200Bytes.txt b/packages/ipfs-unixfs-importer/test/fixtures/pim/200Bytes.txt
new file mode 100644
index 00000000..d95023c7
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/fixtures/pim/200Bytes.txt
@@ -0,0 +1,4 @@
+wxxM{
+DzH/&^RS/v,R
+=Ng~pf1\[>%U1@Q׀2&m6qQ]|!KE~J
֕읝ojbn3eT)D+;s
+컓:Ty!c3\*T7E?[Pv}A+cx~e
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/small.txt b/packages/ipfs-unixfs-importer/test/fixtures/small.txt
new file mode 100644
index 00000000..f81fce04
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/fixtures/small.txt
@@ -0,0 +1 @@
+this is a file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/test-file.txt b/packages/ipfs-unixfs-importer/test/fixtures/test-file.txt
new file mode 100644
index 00000000..b3ab23d1
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/fixtures/test-file.txt
@@ -0,0 +1,20361 @@
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/test-file.txt.link-block0 b/packages/ipfs-unixfs-importer/test/fixtures/test-file.txt.link-block0
new file mode 100644
index 00000000..f9810363
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/fixtures/test-file.txt.link-block0
@@ -0,0 +1,4728 @@
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/fixtures/test-video.ogv b/packages/ipfs-unixfs-importer/test/fixtures/test-video.ogv
new file mode 100644
index 00000000..55e83f48
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/fixtures/test-video.ogv differ
diff --git a/packages/ipfs-unixfs-importer/test/hash-parity-with-go-ipfs.spec.js b/packages/ipfs-unixfs-importer/test/hash-parity-with-go-ipfs.spec.js
new file mode 100644
index 00000000..94e44fda
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/hash-parity-with-go-ipfs.spec.js
@@ -0,0 +1,51 @@
+/* eslint-env mocha */
+'use strict'
+
+const importer = require('../src')
+
+const chai = require('chai')
+chai.use(require('dirty-chai'))
+const expect = chai.expect
+const IPLD = require('ipld')
+const inMemory = require('ipld-in-memory')
+const randomByteStream = require('./helpers/finite-pseudorandom-byte-stream')
+const first = require('it-first')
+
+const strategies = [
+ 'flat',
+ 'trickle',
+ 'balanced'
+]
+
+const expectedHashes = {
+ flat: 'QmeJ9FRWKnXZQiX5CM1E8j4gpGbg6otpgajThqsbnBpoyD',
+ balanced: 'QmRdPboiJQoZ5cdazR9a8vGqdJvWg6M5bfdtUSKNHpuscj',
+ trickle: 'QmdZcefqMZ3tzdS4CRBN5s1c67eS3nQzN8TNXFBYfgofoy'
+}
+
+strategies.forEach(strategy => {
+ const options = {
+ strategy: strategy
+ }
+
+ describe('go-ipfs interop using importer:' + strategy, () => {
+ let ipld
+
+ before(async () => {
+ ipld = await inMemory(IPLD)
+ })
+
+ it('yields the same tree as go-ipfs', async function () {
+ this.timeout(100 * 1000)
+
+ const source = [{
+ path: 'big.dat',
+ content: randomByteStream(45900000, 7382)
+ }]
+
+ const file = await first(importer(source, ipld, options))
+
+ expect(file.cid.toBaseEncodedString()).to.be.equal(expectedHashes[strategy])
+ })
+ })
+})
diff --git a/packages/ipfs-unixfs-importer/test/helpers/collect-leaf-cids.js b/packages/ipfs-unixfs-importer/test/helpers/collect-leaf-cids.js
new file mode 100644
index 00000000..4ef6a4e9
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/helpers/collect-leaf-cids.js
@@ -0,0 +1,20 @@
+'use strict'
+
+module.exports = function (cid, ipld) {
+ async function * traverse (cid) {
+ const node = await ipld.get(cid)
+
+ if (Buffer.isBuffer(node) || !node.Links.length) {
+ yield {
+ node,
+ cid
+ }
+
+ return
+ }
+
+ node.Links.forEach(link => traverse(link.Hash))
+ }
+
+ return traverse(cid)
+}
diff --git a/packages/ipfs-unixfs-importer/test/helpers/finite-pseudorandom-byte-stream.js b/packages/ipfs-unixfs-importer/test/helpers/finite-pseudorandom-byte-stream.js
new file mode 100644
index 00000000..3b07c734
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/helpers/finite-pseudorandom-byte-stream.js
@@ -0,0 +1,24 @@
+'use strict'
+
+const REPEATABLE_CHUNK_SIZE = 300000
+
+module.exports = function * (maxSize, seed) {
+ const chunks = Math.ceil(maxSize / REPEATABLE_CHUNK_SIZE)
+ let emitted = 0
+ const buf = Buffer.alloc(REPEATABLE_CHUNK_SIZE)
+
+ while (emitted !== chunks) {
+ for (let i = 0; i < buf.length; i++) {
+ buf[i] = 256 & Math.floor(random(seed) * 256)
+ }
+
+ yield buf
+
+ emitted++
+ }
+}
+
+function random (seed) {
+ const x = Math.sin(seed) * 10000
+ return x - Math.floor(x)
+}
diff --git a/packages/ipfs-unixfs-importer/test/helpers/random-byte-stream.js b/packages/ipfs-unixfs-importer/test/helpers/random-byte-stream.js
new file mode 100644
index 00000000..776ae90f
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/helpers/random-byte-stream.js
@@ -0,0 +1,15 @@
+'use strict'
+
+module.exports = function * randomByteStream (seed) {
+ while (true) {
+ const r = Math.floor(random(seed) * 256)
+ seed = r
+
+ yield Buffer.from([r])
+ }
+}
+
+function random (seed) {
+ const x = Math.sin(seed) * 10000
+ return x - Math.floor(x)
+}
diff --git a/packages/ipfs-unixfs-importer/test/helpers/stream-to-array.js b/packages/ipfs-unixfs-importer/test/helpers/stream-to-array.js
new file mode 100644
index 00000000..e69de29b
diff --git a/packages/ipfs-unixfs-importer/test/import-export-nested-dir.spec.js b/packages/ipfs-unixfs-importer/test/import-export-nested-dir.spec.js
new file mode 100644
index 00000000..ae607121
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/import-export-nested-dir.spec.js
@@ -0,0 +1,115 @@
+/* eslint-env mocha */
+'use strict'
+
+const chai = require('chai')
+chai.use(require('dirty-chai'))
+const expect = chai.expect
+const IPLD = require('ipld')
+const inMemory = require('ipld-in-memory')
+const all = require('it-all')
+const importer = require('../src')
+const exporter = require('ipfs-unixfs-exporter')
+
+describe('import and export: directory', () => {
+ const rootHash = 'QmdCrquDwd7RfZ6GCZFEVADwe8uyyw1YmF9mtAB7etDgmK'
+ let ipld
+
+ before(async () => {
+ ipld = await inMemory(IPLD)
+ })
+
+ it('imports', async function () {
+ this.timeout(20 * 1000)
+
+ const source = [{
+ path: 'a/b/c/d/e',
+ content: Buffer.from('banana')
+ }, {
+ path: 'a/b/c/d/f',
+ content: Buffer.from('strawberry')
+ }, {
+ path: 'a/b/g',
+ content: Buffer.from('ice')
+ }, {
+ path: 'a/b/h',
+ content: Buffer.from('cream')
+ }]
+
+ const files = await all(importer(source, ipld))
+
+ expect(files.map(normalizeNode).sort(byPath)).to.be.eql([{
+ path: 'a/b/h',
+ multihash: 'QmWHMpCtdNjemT2F3SjyrmnBXQXwEohaZd4apcbFBhbFRC'
+ }, {
+ path: 'a/b/g',
+ multihash: 'QmQGwYzzTPcbqTiy2Nbp88gqqBqCWY4QZGfen45LFZkD5n'
+ }, {
+ path: 'a/b/c/d/f',
+ multihash: 'QmNVHs2dy7AjGUotsubWVncRsD3SpRXm8MgmCCQTVdVACz'
+ }, {
+ path: 'a/b/c/d/e',
+ multihash: 'QmYPbDKwc7oneCcEc6BcRSN5GXthTGWUCd19bTCyP9u3vH'
+ }, {
+ path: 'a/b/c/d',
+ multihash: 'QmQGDXr3ysARM38n7h79Tx7yD3YxuzcnZ1naG71WMojPoj'
+ }, {
+ path: 'a/b/c',
+ multihash: 'QmYTVcjYpN3hQLtJstCPE8hhEacAYjWAuTmmAAXoonamuE'
+ }, {
+ path: 'a/b',
+ multihash: 'QmWyWYxq1GD9fEyckf5LrJv8hMW35CwfWwzDBp8bTw3NQj'
+ }, {
+ path: 'a',
+ multihash: rootHash
+ }])
+ })
+
+ it('exports', async function () {
+ this.timeout(20 * 1000)
+
+ const dir = await exporter(rootHash, ipld)
+ const files = await recursiveExport(dir, rootHash)
+
+ expect(files.sort(byPath)).to.eql([{
+ path: 'QmdCrquDwd7RfZ6GCZFEVADwe8uyyw1YmF9mtAB7etDgmK/b/h',
+ content: 'cream'
+ }, {
+ path: 'QmdCrquDwd7RfZ6GCZFEVADwe8uyyw1YmF9mtAB7etDgmK/b/g',
+ content: 'ice'
+ }, {
+ path: 'QmdCrquDwd7RfZ6GCZFEVADwe8uyyw1YmF9mtAB7etDgmK/b/c/d/f',
+ content: 'strawberry'
+ }, {
+ path: 'QmdCrquDwd7RfZ6GCZFEVADwe8uyyw1YmF9mtAB7etDgmK/b/c/d/e',
+ content: 'banana'
+ }])
+ })
+})
+
+async function recursiveExport (node, path, entries = []) {
+ for await (const entry of node.content()) {
+ if (entry.unixfs.type === 'directory') {
+ await recursiveExport(entry, `${path}/${entry.name}`, entries)
+ } else {
+ entries.push({
+ path: `${path}/${entry.name}`,
+ content: Buffer.concat(await all(entry.content())).toString()
+ })
+ }
+ }
+
+ return entries
+}
+
+function normalizeNode (node) {
+ return {
+ path: node.path,
+ multihash: node.cid.toBaseEncodedString()
+ }
+}
+
+function byPath (a, b) {
+ if (a.path > b.path) return -1
+ if (a.path < b.path) return 1
+ return 0
+}
diff --git a/packages/ipfs-unixfs-importer/test/import-export.spec.js b/packages/ipfs-unixfs-importer/test/import-export.spec.js
new file mode 100644
index 00000000..16a91eae
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/import-export.spec.js
@@ -0,0 +1,50 @@
+/* eslint-env mocha */
+/* eslint max-nested-callbacks: ["error", 5] */
+'use strict'
+
+const chai = require('chai')
+chai.use(require('dirty-chai'))
+const expect = chai.expect
+const IPLD = require('ipld')
+const inMemory = require('ipld-in-memory')
+const loadFixture = require('aegir/fixtures')
+const isNode = require('detect-node')
+const bigFile = loadFixture((isNode ? __dirname : 'test') + '/fixtures/1.2MiB.txt')
+
+const importer = require('../src')
+const exporter = require('ipfs-unixfs-exporter')
+
+const strategies = [
+ 'flat',
+ 'balanced',
+ 'trickle'
+]
+
+describe('import and export', function () {
+ this.timeout(30 * 1000)
+
+ strategies.forEach((strategy) => {
+ const importerOptions = { strategy: strategy }
+
+ describe('using builder: ' + strategy, () => {
+ let ipld
+
+ before(async () => {
+ ipld = await inMemory(IPLD)
+ })
+
+ it('imports and exports', async () => {
+ const path = `${strategy}-big.dat`
+ const values = [{ path: path, content: bigFile }]
+
+ for await (const file of importer(values, ipld, importerOptions)) {
+ expect(file.path).to.eql(path)
+
+ const result = await exporter(file.cid, ipld)
+
+ expect(result.unixfs.fileSize()).to.eql(bigFile.length)
+ }
+ })
+ })
+ })
+})
diff --git a/packages/ipfs-unixfs-importer/test/importer.spec.js b/packages/ipfs-unixfs-importer/test/importer.spec.js
new file mode 100644
index 00000000..0f5b6589
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/importer.spec.js
@@ -0,0 +1,987 @@
+/* eslint-env mocha */
+'use strict'
+
+const importer = require('../src')
+const exporter = require('ipfs-unixfs-exporter')
+
+const extend = require('deep-extend')
+const chai = require('chai')
+chai.use(require('dirty-chai'))
+const expect = chai.expect
+const spy = require('sinon/lib/sinon/spy')
+const IPLD = require('ipld')
+const inMemory = require('ipld-in-memory')
+const UnixFs = require('ipfs-unixfs')
+const collectLeafCids = require('./helpers/collect-leaf-cids')
+const loadFixture = require('aegir/fixtures')
+const isNode = require('detect-node')
+const bigFile = loadFixture((isNode ? __dirname : 'test') + '/fixtures/1.2MiB.txt')
+const smallFile = loadFixture((isNode ? __dirname : 'test') + '/fixtures/200Bytes.txt')
+const all = require('it-all')
+const first = require('it-first')
+
+function stringifyMh (files) {
+ return files.map((file) => {
+ return {
+ ...file,
+ cid: file.cid.toBaseEncodedString()
+ }
+ })
+}
+
+function dateToTimespec (date) {
+ const ms = date.getTime()
+ const secs = Math.floor(ms / 1000)
+
+ return {
+ secs,
+ nsecs: (ms - (secs * 1000)) * 1000
+ }
+}
+
+const baseFiles = {
+ '200Bytes.txt': {
+ cid: 'QmQmZQxSKQppbsWfVzBvg59Cn3DKtsNVQ94bjAxg2h3Lb8',
+ size: 200,
+ type: 'file',
+ path: '200Bytes.txt'
+ },
+ '1.2MiB.txt': {
+ cid: 'QmW7BDxEbGqxxSYVtn3peNPQgdDXbWkoQ6J1EFYAEuQV3Q',
+ size: 1258000,
+ type: 'file',
+ path: '1.2MiB.txt'
+ },
+ 'small.txt': {
+ cid: 'QmZMb7HWpbevpcdhbUV1ZZgdji8vh5uQ13KxczChGrK9Rd',
+ size: 15,
+ type: 'file',
+ path: 'small.txt'
+ }
+}
+
+const strategyBaseFiles = {
+ flat: baseFiles,
+ balanced: extend({}, baseFiles, {
+ '1.2MiB.txt': {
+ cid: 'QmW7BDxEbGqxxSYVtn3peNPQgdDXbWkoQ6J1EFYAEuQV3Q',
+ type: 'file'
+ }
+ }),
+ trickle: extend({}, baseFiles, {
+ '200Bytes.txt': {
+ cid: 'QmY8bwnoKAKvJ8qtyPhWNxSS6sxiGVTJ9VpdQffs2KB5pE',
+ size: 200,
+ type: 'file',
+ path: '200Bytes.txt'
+ },
+ '1.2MiB.txt': {
+ cid: 'QmfAxsHrpaLLuhbqqbo9KQyvQNawMnVSwutYoJed75pnco',
+ type: 'file'
+ }
+ })
+}
+
+const strategies = [
+ 'flat',
+ 'balanced',
+ 'trickle'
+]
+
+const strategyOverrides = {
+ balanced: {
+ 'foo-big': {
+ cid: 'QmaFgyFJUP4fxFySJCddg2Pj6rpwSywopWk87VEVv52RSj',
+ path: 'foo-big',
+ size: 1335478,
+ type: 'directory'
+ },
+ pim: {
+ cid: 'QmY8a78tx6Tk6naDgWCgTsd9EqGrUJRrH7dDyQhjyrmH2i',
+ path: 'pim',
+ size: 1335744,
+ type: 'directory'
+ },
+ 'pam/pum': {
+ cid: 'QmY8a78tx6Tk6naDgWCgTsd9EqGrUJRrH7dDyQhjyrmH2i',
+ path: 'pam/pum',
+ size: 1335744,
+ type: 'directory'
+ },
+ pam: {
+ cid: 'QmRgdtzNx1H1BPJqShdhvWZ2D4DA2HUgZJ3XLtoXei27Av',
+ path: 'pam',
+ size: 2671269,
+ type: 'directory'
+ }
+ },
+ trickle: {
+ 'foo-big': {
+ cid: 'QmaKbhFRy9kcCbcwrLsqYHWMiY44BDYkqTCMpAxDdd2du2',
+ path: 'foo-big',
+ size: 1334657,
+ type: 'directory'
+ },
+ pim: {
+ cid: 'QmbWGdnua4YuYpWJb7fE25PRbW9GbKKLqq9Ucmnsg2gxnt',
+ path: 'pim',
+ size: 1334923,
+ type: 'directory'
+ },
+ 'pam/pum': {
+ cid: 'QmbWGdnua4YuYpWJb7fE25PRbW9GbKKLqq9Ucmnsg2gxnt',
+ path: 'pam/pum',
+ size: 1334923,
+ type: 'directory'
+ },
+ pam: {
+ cid: 'QmSuh47G9Qm3PFv1zziojtHxqCjuurSdtWAzxLxoKJPq2U',
+ path: 'pam',
+ size: 2669627,
+ type: 'directory'
+ },
+ '200Bytes.txt with raw leaves': {
+ cid: 'QmagyRwMfYhczYNv5SvcJc8xxXjZQBTTHS2jEqNMva2mYT',
+ size: 200,
+ path: '200Bytes.txt',
+ type: 'file'
+ },
+ 'foo/bar': {
+ cid: 'QmTGMxKPzSGNBDp6jhTwnZxGW6w1S9ciyycRJ4b2qcQaHK',
+ size: 0,
+ path: 'foo/bar',
+ type: 'directory'
+ },
+ foo: {
+ cid: 'Qme4A8fZmwfZESappfPcxSMTZVACiEzhHKtYRMuM1hbkDp',
+ size: 0,
+ path: 'foo',
+ type: 'directory'
+ },
+ 'small.txt': {
+ cid: 'QmXmZ3qT328JxWtQXqrmvma2FmPp7tMdNiSuYvVJ5QRhKs',
+ size: 15,
+ type: 'file',
+ path: 'small.txt'
+ }
+ }
+}
+
+const checkLeafNodeTypes = async (ipld, options, expected) => {
+ const file = await first(importer([{
+ path: 'foo',
+ content: Buffer.alloc(262144 + 5).fill(1)
+ }], ipld, options))
+
+ const node = await ipld.get(file.cid)
+ const meta = UnixFs.unmarshal(node.Data)
+
+ expect(meta.type).to.equal('file')
+ expect(node.Links.length).to.equal(2)
+
+ const linkedNodes = await Promise.all(
+ node.Links.map(link => ipld.get(link.Hash))
+ )
+
+ linkedNodes.forEach(node => {
+ const meta = UnixFs.unmarshal(node.Data)
+ expect(meta.type).to.equal(expected)
+ })
+}
+
+const checkNodeLinks = async (ipld, options, expected) => {
+ for await (const file of importer([{
+ path: 'foo',
+ content: Buffer.alloc(100).fill(1)
+ }], ipld, options)) {
+ const node = await ipld.get(file.cid)
+ const meta = UnixFs.unmarshal(node.Data)
+
+ expect(meta.type).to.equal('file')
+ expect(node.Links.length).to.equal(expected)
+ }
+}
+
+strategies.forEach((strategy) => {
+ const baseFiles = strategyBaseFiles[strategy]
+ const defaultResults = extend({}, baseFiles, {
+ 'foo/bar/200Bytes.txt': extend({}, baseFiles['200Bytes.txt'], {
+ path: 'foo/bar/200Bytes.txt'
+ }),
+ foo: {
+ path: 'foo',
+ cid: 'QmQrb6KKWGo8w7zKfx2JksptY6wN7B2ysSBdKZr4xMU36d',
+ size: 320,
+ type: 'directory'
+ },
+ 'foo/bar': {
+ path: 'foo/bar',
+ cid: 'Qmf5BQbTUyUAvd6Ewct83GYGnE1F6btiC3acLhR8MDxgkD',
+ size: 270,
+ type: 'directory'
+ },
+ 'foo-big/1.2MiB.txt': extend({}, baseFiles['1.2MiB.txt'], {
+ path: 'foo-big/1.2MiB.txt'
+ }),
+ 'foo-big': {
+ path: 'foo-big',
+ cid: 'QmaFgyFJUP4fxFySJCddg2Pj6rpwSywopWk87VEVv52RSj',
+ size: 1328120,
+ type: 'directory'
+ },
+ 'pim/200Bytes.txt': extend({}, baseFiles['200Bytes.txt'], {
+ path: 'pim/200Bytes.txt'
+ }),
+ 'pim/1.2MiB.txt': extend({}, baseFiles['1.2MiB.txt'], {
+ path: 'pim/1.2MiB.txt'
+ }),
+ pim: {
+ path: 'pim',
+ cid: 'QmY8a78tx6Tk6naDgWCgTsd9EqGrUJRrH7dDyQhjyrmH2i',
+ size: 1328386,
+ type: 'directory'
+ },
+ 'empty-dir': {
+ path: 'empty-dir',
+ cid: 'QmUNLLsPACCz1vLxQVkXqqLX5R1X345qqfHbsf67hvA3Nn',
+ size: 4,
+ type: 'directory'
+ },
+ 'pam/pum': {
+ cid: 'QmY8a78tx6Tk6naDgWCgTsd9EqGrUJRrH7dDyQhjyrmH2i',
+ path: 'pam/pum',
+ size: 1328386,
+ type: 'directory'
+ },
+ pam: {
+ cid: 'QmRgdtzNx1H1BPJqShdhvWZ2D4DA2HUgZJ3XLtoXei27Av',
+ path: 'pam',
+ size: 2656553,
+ type: 'directory'
+ },
+ '200Bytes.txt with raw leaves': extend({}, baseFiles['200Bytes.txt'], {
+ cid: 'QmQmZQxSKQppbsWfVzBvg59Cn3DKtsNVQ94bjAxg2h3Lb8',
+ size: 200
+ })
+ }, strategyOverrides[strategy])
+
+ const expected = extend({}, defaultResults, strategies[strategy])
+
+ const expectFiles = (actualFiles, expectedFiles) => {
+ expect(actualFiles.length).to.equal(expectedFiles.length)
+
+ for (let i = 0; i < expectedFiles.length; i++) {
+ const expectedFile = expected[expectedFiles[i]]
+ const actualFile = actualFiles[i]
+
+ expect(actualFile.path).to.equal(expectedFile.path)
+ expect(actualFile.cid.toBaseEncodedString('base58btc')).to.equal(expectedFile.cid)
+
+ if (actualFile.unixfs) {
+ expect(actualFile.unixfs.type).to.equal(expectedFile.type)
+
+ if (actualFile.unixfs.type === 'file') {
+ expect(actualFile.unixfs.fileSize()).to.equal(expectedFile.size)
+ }
+ }
+ }
+ }
+
+ describe('importer: ' + strategy, function () {
+ this.timeout(30 * 1000)
+
+ let ipld
+ const options = {
+ strategy: strategy
+ }
+
+ before(async () => {
+ ipld = await inMemory(IPLD)
+ })
+
+ it('fails on bad content', async () => {
+ try {
+ await all(importer([{
+ path: '200Bytes.txt',
+ content: 7
+ }], ipld, options))
+ throw new Error('No error was thrown')
+ } catch (err) {
+ expect(err.code).to.equal('ERR_INVALID_CONTENT')
+ }
+ })
+
+ it('fails on an iterator that yields bad content', async () => {
+ try {
+ await all(importer([{
+ path: '200Bytes.txt',
+ content: {
+ [Symbol.iterator]: function * () {
+ yield 7
+ }
+ }
+ }], ipld, options))
+ throw new Error('No error was thrown')
+ } catch (err) {
+ expect(err.code).to.equal('ERR_INVALID_CONTENT')
+ }
+ })
+
+ it('doesn\'t yield anything on empty source', async () => {
+ const files = await all(importer([], ipld, options))
+
+ expect(files).to.be.empty()
+ })
+
+ it('doesn\'t yield anything on empty file', async () => {
+ const files = await all(importer([{
+ path: 'emptyfile',
+ content: Buffer.alloc(0)
+ }], ipld, options))
+
+ expect(files.length).to.eql(1)
+
+ // always yield empty file node
+ expect(files[0].cid.toBaseEncodedString()).to.eql('QmbFMke1KXqnYyBBWxB74N4c5SBnJMVAiMNRcGu6x1AwQH')
+ })
+
+ it('fails on more than one root', async () => {
+ try {
+ await all(importer([{
+ path: 'beep/200Bytes.txt',
+ content: smallFile
+ }, {
+ path: 'boop/200Bytes.txt',
+ content: bigFile
+ }], ipld, options))
+
+ throw new Error('No error was thrown')
+ } catch (err) {
+ expect(err.code).to.equal('ERR_MORE_THAN_ONE_ROOT')
+ }
+ })
+
+ it('accepts strings as content', async () => {
+ const content = 'I am a string'
+ const res = await all(importer([{
+ path: '200Bytes.txt',
+ content
+ }], ipld, options))
+
+ const file = await exporter(res[0].cid, ipld)
+ const fileContent = await all(file.content())
+
+ expect(fileContent.toString()).to.equal(content)
+ })
+
+ it('small file with an escaped slash in the title', async () => {
+ const filePath = `small-\\/file-${Math.random()}.txt`
+ const files = await all(importer([{
+ path: filePath,
+ content: smallFile
+ }], ipld, options))
+
+ expect(files.length).to.equal(1)
+ expect(files[0].path).to.equal(filePath)
+ })
+
+ it('small file with square brackets in the title', async () => {
+ const filePath = `small-[v]-file-${Math.random()}.txt`
+ const files = await all(importer([{
+ path: filePath,
+ content: smallFile
+ }], ipld, options))
+
+ expect(files.length).to.equal(1)
+ expect(files[0].path).to.equal(filePath)
+ })
+
+ it('small file as buffer (smaller than a chunk)', async () => {
+ const files = await all(importer([{
+ path: '200Bytes.txt',
+ content: smallFile
+ }], ipld, options))
+
+ expectFiles(files, [
+ '200Bytes.txt'
+ ])
+ })
+
+ it('small file as array (smaller than a chunk)', async () => {
+ const files = await all(importer([{
+ path: '200Bytes.txt',
+ content: Array.from(smallFile)
+ }], ipld, options))
+
+ expectFiles(files, [
+ '200Bytes.txt'
+ ])
+ })
+
+ it('small file as string (smaller than a chunk)', async () => {
+ const files = await all(importer([{
+ path: 'small.txt',
+ content: 'this is a file\n'
+ }], ipld, options))
+
+ expectFiles(files, [
+ 'small.txt'
+ ])
+ })
+
+ it('small file (smaller than a chunk) with raw leaves', async () => {
+ const files = await all(importer([{
+ path: '200Bytes.txt',
+ content: smallFile
+ }], ipld, {
+ ...options,
+ rawLeaves: true
+ }))
+
+ expectFiles(files, [
+ '200Bytes.txt with raw leaves'
+ ])
+ })
+
+ it('small file (smaller than a chunk) inside a dir', async () => {
+ const files = await all(importer([{
+ path: 'foo/bar/200Bytes.txt',
+ content: smallFile
+ }], ipld, options))
+
+ expectFiles(files, [
+ 'foo/bar/200Bytes.txt',
+ 'foo/bar',
+ 'foo'
+ ])
+ })
+
+ it('file bigger than a single chunk', async () => {
+ this.timeout(60 * 1000)
+
+ const files = await all(importer([{
+ path: '1.2MiB.txt',
+ content: bigFile
+ }], ipld, options))
+
+ expectFiles(files, [
+ '1.2MiB.txt'
+ ])
+ })
+
+ it('file bigger than a single chunk inside a dir', async () => {
+ this.timeout(60 * 1000)
+
+ const files = await all(importer([{
+ path: 'foo-big/1.2MiB.txt',
+ content: bigFile
+ }], ipld, options))
+
+ expectFiles(files, [
+ 'foo-big/1.2MiB.txt',
+ 'foo-big'
+ ])
+ })
+
+ it('empty directory', async () => {
+ const files = await all(importer([{
+ path: 'empty-dir'
+ }], ipld, options))
+
+ expectFiles(files, [
+ 'empty-dir'
+ ])
+ })
+
+ it('directory with files', async () => {
+ const files = await all(importer([{
+ path: 'pim/200Bytes.txt',
+ content: smallFile
+ }, {
+ path: 'pim/1.2MiB.txt',
+ content: bigFile
+ }], ipld, options))
+
+ expectFiles(files, [
+ 'pim/200Bytes.txt',
+ 'pim/1.2MiB.txt',
+ 'pim'
+ ])
+ })
+
+ it('nested directory (2 levels deep)', async () => {
+ const files = await all(importer([{
+ path: 'pam/pum/200Bytes.txt',
+ content: smallFile
+ }, {
+ path: 'pam/pum/1.2MiB.txt',
+ content: bigFile
+ }, {
+ path: 'pam/1.2MiB.txt',
+ content: bigFile
+ }], ipld, options))
+
+ const result = stringifyMh(files)
+
+ expect(result.length).to.equal(5)
+
+ result.forEach(eachFile)
+
+ function eachFile (file) {
+ if (file.path === 'pam/pum/200Bytes.txt') {
+ expect(file.cid).to.equal(expected['200Bytes.txt'].cid)
+ expect(file.unixfs.fileSize()).to.equal(expected['200Bytes.txt'].size)
+ } else if (file.path === 'pam/pum/1.2MiB.txt') {
+ expect(file.cid).to.equal(expected['1.2MiB.txt'].cid)
+ expect(file.unixfs.fileSize()).to.equal(expected['1.2MiB.txt'].size)
+ } else if (file.path === 'pam/pum') {
+ expect(file.cid).to.equal(expected['pam/pum'].cid)
+ } else if (file.path === 'pam/1.2MiB.txt') {
+ expect(file.cid).to.equal(expected['1.2MiB.txt'].cid)
+ expect(file.unixfs.fileSize()).to.equal(expected['1.2MiB.txt'].size)
+ } else if (file.path === 'pam') {
+ expect(file.cid).to.equal(expected.pam.cid)
+ } else {
+ throw new Error(`Unexpected path ${file.path}`)
+ }
+ }
+ })
+
+ it('will not write to disk if passed "onlyHash" option', async () => {
+ const content = String(Math.random() + Date.now())
+ const files = await all(importer([{
+ path: content + '.txt',
+ content: Buffer.from(content)
+ }], ipld, {
+ onlyHash: true
+ }))
+
+ const file = files[0]
+ expect(file).to.exist()
+
+ try {
+ await ipld.get(file.cid)
+
+ throw new Error('No error was thrown')
+ } catch (err) {
+ expect(err.code).to.equal('ERR_NOT_FOUND')
+ }
+ })
+
+ it('will call an optional progress function', async () => {
+ const maxChunkSize = 2048
+
+ const options = {
+ progress: spy(),
+ maxChunkSize
+ }
+
+ await all(importer([{
+ path: '1.2MiB.txt',
+ content: bigFile
+ }], ipld, options))
+
+ expect(options.progress.called).to.equal(true)
+ expect(options.progress.args[0][0]).to.equal(maxChunkSize)
+ })
+
+ it('will import files with CID version 1', async () => {
+ const createInputFile = (path, size) => {
+ const name = String(Math.random() + Date.now())
+ path = path[path.length - 1] === '/' ? path : path + '/'
+ return {
+ path: path + name + '.txt',
+ content: Buffer.alloc(size).fill(1)
+ }
+ }
+
+ const inputFiles = [
+ createInputFile('/foo', 10),
+ createInputFile('/foo', 60),
+ createInputFile('/foo/bar', 78),
+ createInputFile('/foo/baz', 200),
+ // Bigger than maxChunkSize
+ createInputFile('/foo', 262144 + 45),
+ createInputFile('/foo/bar', 262144 + 134),
+ createInputFile('/foo/bar', 262144 + 79),
+ createInputFile('/foo/bar', 262144 + 876),
+ createInputFile('/foo/bar', 262144 + 21)
+ ]
+
+ const options = {
+ cidVersion: 1,
+ // Ensures we use DirSharded for the data below
+ shardSplitThreshold: 3
+ }
+
+ // Pass a copy of inputFiles, since the importer mutates them
+ const files = await all(importer(inputFiles.map(f => Object.assign({}, f)), ipld, options))
+
+ const file = files[0]
+ expect(file).to.exist()
+
+ for (let i = 0; i < file.length; i++) {
+ const file = files[i]
+
+ const cid = file.cid.toV1()
+ const inputFile = inputFiles.find(f => f.path === file.path)
+
+ // Just check the intermediate directory can be retrieved
+ if (!inputFile) {
+ await ipld.get(cid)
+ }
+
+ // Check the imported content is correct
+ const node = await exporter(cid, ipld)
+ const chunks = []
+
+ for await (const chunk of node.content()) {
+ chunks.push(chunk)
+ }
+
+ expect(Buffer.concat(chunks)).to.deep.equal(inputFile.content)
+ }
+ })
+
+ it('imports file with raw leaf nodes when specified', () => {
+ return checkLeafNodeTypes(ipld, {
+ leafType: 'raw'
+ }, 'raw')
+ })
+
+ it('imports file with file leaf nodes when specified', () => {
+ return checkLeafNodeTypes(ipld, {
+ leafType: 'file'
+ }, 'file')
+ })
+
+ it('reduces file to single node when specified', () => {
+ return checkNodeLinks(ipld, {
+ reduceSingleLeafToSelf: true
+ }, 0)
+ })
+
+ it('does not reduce file to single node when overidden by options', () => {
+ return checkNodeLinks(ipld, {
+ reduceSingleLeafToSelf: false
+ }, 1)
+ })
+
+ it('uses raw leaf nodes when requested', async () => {
+ this.timeout(60 * 1000)
+
+ const options = {
+ rawLeaves: true
+ }
+
+ for await (const file of importer([{
+ path: '1.2MiB.txt',
+ content: bigFile
+ }], ipld, options)) {
+ for await (const { cid } of collectLeafCids(file.cid, ipld)) {
+ expect(cid.codec).to.be('raw')
+ expect(cid.version).to.be(1)
+ }
+ }
+ })
+
+ it('supports passing mtime', async () => {
+ this.timeout(60 * 1000)
+
+ const options = {
+ rawLeaves: true
+ }
+ const now = new Date()
+
+ for await (const file of importer([{
+ path: '1.2MiB.txt',
+ content: bigFile,
+ mtime: now
+ }], ipld, options)) {
+ const node = await exporter(file.cid, ipld)
+
+ expect(node).to.have.nested.deep.property('unixfs.mtime', dateToTimespec(now))
+ }
+ })
+
+ it('supports passing mtime for directories', async () => {
+ this.timeout(60 * 1000)
+
+ const now = new Date()
+
+ const entries = await all(importer([{
+ path: '/foo',
+ mtime: now
+ }], ipld))
+
+ const node = await exporter(entries[0].cid, ipld)
+ expect(node).to.have.nested.deep.property('unixfs.mtime', dateToTimespec(now))
+ })
+
+ it('supports passing metadata for wrapping directories', async () => {
+ this.timeout(60 * 1000)
+
+ const now = new Date()
+ const perms = 0o0777
+
+ const entries = await all(importer([{
+ path: '/foo',
+ mtime: now,
+ mode: perms
+ }, {
+ path: '/foo/bar.txt',
+ content: bigFile
+ }], ipld))
+
+ const nodes = await all(exporter.recursive(entries[entries.length - 1].cid, ipld))
+ const node = nodes.filter(node => node.unixfs.type === 'directory').pop()
+
+ if (!node) {
+ expect.fail('no directory found')
+ }
+
+ expect(node).to.have.nested.deep.property('unixfs.mtime', dateToTimespec(now))
+ expect(node).to.have.nested.property('unixfs.mode', perms)
+ })
+
+ it('supports passing metadata for intermediate directories', async () => {
+ this.timeout(60 * 1000)
+
+ const now = new Date()
+ const perms = 0o0777
+
+ const entries = await all(importer([{
+ path: '/foo/bar',
+ mtime: now,
+ mode: perms
+ }, {
+ path: '/foo/bar/baz.txt',
+ content: bigFile
+ }], ipld))
+
+ const nodes = await all(exporter.recursive(entries[entries.length - 1].cid, ipld))
+ const node = nodes.filter(node => node.unixfs.type === 'directory').pop()
+
+ if (!node) {
+ expect.fail('no directory found')
+ }
+
+ expect(node).to.have.nested.deep.property('unixfs.mtime', dateToTimespec(now))
+ expect(node).to.have.nested.property('unixfs.mode', perms)
+ })
+
+ it('supports passing metadata for out of order intermediate directories', async () => {
+ this.timeout(60 * 1000)
+
+ const now = new Date()
+ const perms = 0o0777
+
+ const entries = await all(importer([{
+ path: '/foo/bar/qux.txt',
+ content: bigFile
+ }, {
+ path: '/foo/bar',
+ mtime: now,
+ mode: perms
+ }, {
+ path: '/foo/quux'
+ }, {
+ path: '/foo/bar/baz.txt',
+ content: bigFile
+ }], ipld))
+
+ const nodes = await all(exporter.recursive(entries[entries.length - 1].cid, ipld))
+ const node = nodes.filter(node => node.unixfs.type === 'directory' && node.name === 'bar').pop()
+
+ if (!node) {
+ expect.fail('no directory found')
+ }
+
+ expect(node).to.have.nested.deep.property('unixfs.mtime', dateToTimespec(now))
+ expect(node).to.have.nested.property('unixfs.mode', perms)
+ })
+
+ it('supports passing mtime for hamt-sharded-directories', async () => {
+ this.timeout(60 * 1000)
+
+ const now = new Date()
+
+ const entries = await all(importer([{
+ path: '/foo',
+ mtime: now
+ }, {
+ path: '/foo/bar.txt',
+ content: bigFile
+ }, {
+ path: '/foo/baz.txt',
+ content: bigFile
+ }, {
+ path: '/foo/qux'
+ }], ipld, {
+ shardSplitThreshold: 0
+ }))
+
+ const nodes = await all(exporter.recursive(entries[entries.length - 1].cid, ipld))
+ const node = nodes.filter(node => node.unixfs.type === 'hamt-sharded-directory').pop()
+
+ if (!node) {
+ expect.fail('no hamt-sharded-directory found')
+ }
+
+ expect(node).to.have.nested.deep.property('unixfs.mtime', dateToTimespec(now))
+ })
+
+ it('supports passing mode', async () => {
+ this.timeout(60 * 1000)
+
+ const options = {
+ rawLeaves: true
+ }
+ const mode = 0o0111
+
+ for await (const file of importer([{
+ path: '1.2MiB.txt',
+ content: bigFile,
+ mode
+ }], ipld, options)) {
+ const node = await exporter(file.cid, ipld)
+
+ expect(node).to.have.nested.property('unixfs.mode', mode)
+ }
+ })
+
+ it('supports passing mode for directories', async () => {
+ this.timeout(60 * 1000)
+
+ const mode = 0o0111
+
+ const entries = await all(importer([{
+ path: '/foo',
+ mode
+ }], ipld))
+
+ const node = await exporter(entries[0].cid, ipld)
+ expect(node).to.have.nested.property('unixfs.mode', mode)
+ })
+
+ it('supports passing different modes for different files', async () => {
+ this.timeout(60 * 1000)
+
+ const mode1 = 0o0111
+ const mode2 = 0o0222
+
+ const entries = await all(importer([{
+ path: '/foo/file1.txt',
+ content: bigFile,
+ mode: mode1
+ }, {
+ path: '/foo/file2.txt',
+ content: bigFile,
+ mode: mode2
+ }], ipld))
+
+ const node1 = await exporter(entries[0].cid, ipld)
+ expect(node1).to.have.nested.property('unixfs.mode', mode1)
+
+ const node2 = await exporter(entries[1].cid, ipld)
+ expect(node2).to.have.nested.property('unixfs.mode', mode2)
+ })
+
+ it('supports deeply nested files do not inherit custom metadata', async () => {
+ this.timeout(60 * 1000)
+
+ const mode = 0o0111
+
+ const entries = await all(importer([{
+ path: '/foo/file1.txt',
+ content: bigFile,
+ mode: mode
+ }, {
+ path: '/foo/bar/baz/file2.txt',
+ content: bigFile
+ }], ipld))
+
+ const node1 = await exporter(entries[0].cid, ipld)
+ expect(node1).to.have.nested.property('unixfs.mode', mode)
+
+ const node2 = await exporter(entries[1].cid, ipld)
+ expect(node2).to.have.nested.property('unixfs.mode').that.does.not.equal(mode)
+ })
+
+ it('files and directories get default mode if not specified', async () => {
+ this.timeout(60 * 1000)
+
+ const entries = await all(importer([{
+ path: '/foo/file1.txt',
+ content: bigFile
+ }], ipld))
+
+ const node1 = await exporter(entries[0].cid, ipld)
+ expect(node1).to.have.nested.property('unixfs.mode', 0o0644)
+
+ const node2 = await exporter(entries[1].cid, ipld)
+ expect(node2).to.have.nested.property('unixfs.mode', 0o0755)
+ })
+ })
+})
+
+describe('configuration', () => {
+ it('alllows configuring with custom dag and tree builder', async () => {
+ let builtTree = false
+ const ipld = 'ipld'
+ const entries = await all(importer([{
+ path: 'path',
+ content: 'content'
+ }], ipld, {
+ dagBuilder: async function * (source, ipld, opts) { // eslint-disable-line require-await
+ yield function () {
+ return Promise.resolve({
+ cid: 'cid',
+ path: 'path',
+ unixfs: 'unixfs'
+ })
+ }
+ },
+ treeBuilder: async function * (source, ipld, opts) { // eslint-disable-line require-await
+ builtTree = true
+ yield * source
+ }
+ }))
+
+ expect(entries).to.have.lengthOf(1)
+ expect(entries).to.have.nested.property('[0].cid', 'cid')
+ expect(entries).to.have.nested.property('[0].path', 'path')
+ expect(entries).to.have.nested.property('[0].unixfs', 'unixfs')
+
+ expect(builtTree).to.be.true()
+ })
+
+ it('alllows configuring with custom chunker', async () => {
+ let validated = false
+ let chunked = false
+ const ipld = {
+ put: () => 'cid'
+ }
+ const entries = await all(importer([{
+ path: 'path',
+ content: 'content'
+ }], ipld, {
+ chunkValidator: async function * (source, opts) { // eslint-disable-line require-await
+ validated = true
+ yield * source
+ },
+ chunker: async function * (source, opts) { // eslint-disable-line require-await
+ chunked = true
+ yield * source
+ }
+ }))
+
+ expect(entries).to.have.lengthOf(1)
+ expect(entries).to.have.nested.property('[0].cid', 'cid')
+ expect(entries).to.have.nested.property('[0].path', 'path')
+ expect(entries).to.have.nested.property('[0].unixfs')
+
+ expect(validated).to.be.true()
+ expect(chunked).to.be.true()
+ })
+})
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/2B/CIQCLHFOKW5OR6TMLOEULA42ZCNIUH5AGNU7R5OCASITUKITSBP22BA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/2B/CIQCLHFOKW5OR6TMLOEULA42ZCNIUH5AGNU7R5OCASITUKITSBP22BA.data
new file mode 100644
index 00000000..70641f44
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/2B/CIQCLHFOKW5OR6TMLOEULA42ZCNIUH5AGNU7R5OCASITUKITSBP22BA.data
@@ -0,0 +1,5 @@
+
+wxxM{
+DzH/&^RS/v,R
+=Ng~pf1\[>%U1@Q׀2&m6qQ]|!KE~J
֕읝ojbn3eT)D+;s
+컓:Ty!c3\*T7E?[Pv}A+cx~e
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/2F/CIQEUWUVLBXVFYSYCHHSCRTXCYHGIOBXKWUMKFR3UPAFHQ5WK5362FQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/2F/CIQEUWUVLBXVFYSYCHHSCRTXCYHGIOBXKWUMKFR3UPAFHQ5WK5362FQ.data
new file mode 100644
index 00000000..41456196
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/2F/CIQEUWUVLBXVFYSYCHHSCRTXCYHGIOBXKWUMKFR3UPAFHQ5WK5362FQ.data
@@ -0,0 +1,4 @@
+
+ys# js-ipfs-repo
+Implementation of the IPFS repo spec (https://github.com/ipfs/specs/tree/master/repo) in JavaScript
+s
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/2P/CIQLCBXFJEBKPWASINYRB5OBXDOZ3PBXLDJQNRVNYD7HUBGVZD6T2PA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/2P/CIQLCBXFJEBKPWASINYRB5OBXDOZ3PBXLDJQNRVNYD7HUBGVZD6T2PA.data
new file mode 100644
index 00000000..ce734230
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/2P/CIQLCBXFJEBKPWASINYRB5OBXDOZ3PBXLDJQNRVNYD7HUBGVZD6T2PA.data
@@ -0,0 +1,4 @@
+4
+" si"¹W`
+- browsers or extensions can learn to use `ipfs://` directly
+- hash-addressed content guarantees authenticity
+
+IPFS is modular:
+- connection layer over any network protocol
+- routing layer
+- uses a routing layer DHT (kademlia/coral)
+- uses a path-based naming service
+- uses bittorrent-inspired block exchange
+
+IPFS uses crypto:
+- cryptographic-hash content addressing
+- block-level deduplication
+- file integrity + versioning
+- filesystem-level encryption + signing support
+
+IPFS is p2p:
+- worldwide peer-to-peer file transfers
+- completely decentralized architecture
+- **no** central point of failure
+
+IPFS is a cdn:
+- add a file to the filesystem locally, and it's now available to the world
+- caching-friendly (content-hash naming)
+- bittorrent-based bandwidth distribution
+
+IPFS has a name service:
+- IPNS, an SFS inspired name system
+- global namespace based on PKI
+- serves to build trust chains
+- compatible with other NSes
+- can map DNS, .onion, .bit, etc to IPNS
+
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/7R/CIQGNXYJ6NHQTTNWY7E7MLOVZD5BRFMJL3H27REITBBUWURIKQTP7RQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/7R/CIQGNXYJ6NHQTTNWY7E7MLOVZD5BRFMJL3H27REITBBUWURIKQTP7RQ.data
new file mode 100644
index 00000000..42c502e2
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/7R/CIQGNXYJ6NHQTTNWY7E7MLOVZD5BRFMJL3H27REITBBUWURIKQTP7RQ.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/A3/CIQD76D3PRB4H6QE6C7DJX224YXYFEKLENPHGHEJPQZ723Z4L4IUA3I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/A3/CIQD76D3PRB4H6QE6C7DJX224YXYFEKLENPHGHEJPQZ723Z4L4IUA3I.data
new file mode 100644
index 00000000..46fecabf
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/A3/CIQD76D3PRB4H6QE6C7DJX224YXYFEKLENPHGHEJPQZ723Z4L4IUA3I.data
@@ -0,0 +1,2 @@
+
+Q
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AD/CIQAC3NNL5OBTUOK2A4CXXDS5Y6DLLCSC26OJVKNQ5IVDXLHGM5HADA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AD/CIQAC3NNL5OBTUOK2A4CXXDS5Y6DLLCSC26OJVKNQ5IVDXLHGM5HADA.data
new file mode 100644
index 00000000..1379fd9c
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AD/CIQAC3NNL5OBTUOK2A4CXXDS5Y6DLLCSC26OJVKNQ5IVDXLHGM5HADA.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AD/CIQDKCWPMK632NCNXGUY4TT465TRBMZJ5XRELAX655W3OS5K7ZHVADI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AD/CIQDKCWPMK632NCNXGUY4TT465TRBMZJ5XRELAX655W3OS5K7ZHVADI.data
new file mode 100644
index 00000000..ee87b15f
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AD/CIQDKCWPMK632NCNXGUY4TT465TRBMZJ5XRELAX655W3OS5K7ZHVADI.data
@@ -0,0 +1,1452 @@
+
+l systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+
+
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AE/CIQONICFQZH7QVU6IPSIM3AK7AD554D3BWZPAGEAQYQOWMFZQDUUAEI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AE/CIQONICFQZH7QVU6IPSIM3AK7AD554D3BWZPAGEAQYQOWMFZQDUUAEI.data
new file mode 100644
index 00000000..6860441a
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AE/CIQONICFQZH7QVU6IPSIM3AK7AD554D3BWZPAGEAQYQOWMFZQDUUAEI.data
@@ -0,0 +1,3 @@
+/
+" gq6\u8~:6~gZ.directT2
+" 6(%݄.Ӿ5(ab recursiveT
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AM/CIQHG2O56243WIVNYK4VP2Z4U73KN42HPSC3MZPSDW2QQNAJD6ICAMY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AM/CIQHG2O56243WIVNYK4VP2Z4U73KN42HPSC3MZPSDW2QQNAJD6ICAMY.data
new file mode 100644
index 00000000..f57749f0
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AM/CIQHG2O56243WIVNYK4VP2Z4U73KN42HPSC3MZPSDW2QQNAJD6ICAMY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AM/CIQLILAMXCIV5B2ONEE63TLHVKS52D52O77I52JGFOH7U3ACVOKNAMY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AM/CIQLILAMXCIV5B2ONEE63TLHVKS52D52O77I52JGFOH7U3ACVOKNAMY.data
new file mode 100644
index 00000000..6a0cbe82
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AM/CIQLILAMXCIV5B2ONEE63TLHVKS52D52O77I52JGFOH7U3ACVOKNAMY.data
@@ -0,0 +1,3 @@
+
+
+Q
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AP/CIQHAKDLTL5GMIFGN5YVY4BA22FPHUIODJEXS4LCTQDWA275XAJDAPI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AP/CIQHAKDLTL5GMIFGN5YVY4BA22FPHUIODJEXS4LCTQDWA275XAJDAPI.data
new file mode 100644
index 00000000..74de75af
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AP/CIQHAKDLTL5GMIFGN5YVY4BA22FPHUIODJEXS4LCTQDWA275XAJDAPI.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AU/CIQHESGWL6ZUVT4RKZS6V2ZJ4IDV7RR6M6FQFAOFWXOJYNVRTHTMAUI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AU/CIQHESGWL6ZUVT4RKZS6V2ZJ4IDV7RR6M6FQFAOFWXOJYNVRTHTMAUI.data
new file mode 100644
index 00000000..f4c039c2
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AU/CIQHESGWL6ZUVT4RKZS6V2ZJ4IDV7RR6M6FQFAOFWXOJYNVRTHTMAUI.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AY/CIQGCTOPJA57F6YZZAVSFL44RVD2GWTNJ7OPQXRT3NG7NGD633QHAYY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AY/CIQGCTOPJA57F6YZZAVSFL44RVD2GWTNJ7OPQXRT3NG7NGD633QHAYY.data
new file mode 100644
index 00000000..8eb2a515
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AY/CIQGCTOPJA57F6YZZAVSFL44RVD2GWTNJ7OPQXRT3NG7NGD633QHAYY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/BY/CIQGTEDX5RFSVBJ7FQAHSZGMC5HOO4XG5GZAZRZ5EA43NKKQEJXWBYY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/BY/CIQGTEDX5RFSVBJ7FQAHSZGMC5HOO4XG5GZAZRZ5EA43NKKQEJXWBYY.data
new file mode 100644
index 00000000..a9c1c069
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/BY/CIQGTEDX5RFSVBJ7FQAHSZGMC5HOO4XG5GZAZRZ5EA43NKKQEJXWBYY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/C2/CIQK5PD7SAKEC7TPLYXS3EZQRAVYTFPR5ZY6HTI7DDQAG6AZQOXIC2I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/C2/CIQK5PD7SAKEC7TPLYXS3EZQRAVYTFPR5ZY6HTI7DDQAG6AZQOXIC2I.data
new file mode 100644
index 00000000..1067edb4
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/C2/CIQK5PD7SAKEC7TPLYXS3EZQRAVYTFPR5ZY6HTI7DDQAG6AZQOXIC2I.data
@@ -0,0 +1,6 @@
+
+
+wxxM{
+DzH/&^RS/v,R
+=Ng~pf1\[>%U1@Q׀2&m6qQ]|!KE~J
֕읝ojbn3eT)D+;s
+컓:Ty!c3\*T7E?[Pv}A+cx~e
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/C3/CIQHS67KEOPN356BDJDRIMDCS5NQBMLCVMUGUAM2BWZNUWV7WZ7FC3Q.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/C3/CIQHS67KEOPN356BDJDRIMDCS5NQBMLCVMUGUAM2BWZNUWV7WZ7FC3Q.data
new file mode 100644
index 00000000..4741988d
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/C3/CIQHS67KEOPN356BDJDRIMDCS5NQBMLCVMUGUAM2BWZNUWV7WZ7FC3Q.data
@@ -0,0 +1,4729 @@
+
+e academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/C3/CIQLFQWZO45IGFSCQEDJKOZCAYNHTNVD6NVXVPWWBHATU2YG4E6HC3A.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/C3/CIQLFQWZO45IGFSCQEDJKOZCAYNHTNVD6NVXVPWWBHATU2YG4E6HC3A.data
new file mode 100644
index 00000000..df20559d
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/C3/CIQLFQWZO45IGFSCQEDJKOZCAYNHTNVD6NVXVPWWBHATU2YG4E6HC3A.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/C4/CIQDDZ5EDQK5AP7LRTLZHQZUR2R3GECRFV3WPKNL7PL2SKFIL2LXC4Y.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/C4/CIQDDZ5EDQK5AP7LRTLZHQZUR2R3GECRFV3WPKNL7PL2SKFIL2LXC4Y.data
new file mode 100644
index 00000000..ecce1053
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/C4/CIQDDZ5EDQK5AP7LRTLZHQZUR2R3GECRFV3WPKNL7PL2SKFIL2LXC4Y.data
@@ -0,0 +1,4 @@
+5
+" F_uؔlzS?|ڲPc@js-ipfs-repo
+
+
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/CB/CIQGXO5AO2FIISVRDFHFQ5W7W3HEHGNS5KD2JNYYZGCQ3NQPV22QCBI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/CB/CIQGXO5AO2FIISVRDFHFQ5W7W3HEHGNS5KD2JNYYZGCQ3NQPV22QCBI.data
new file mode 100644
index 00000000..96566028
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/CB/CIQGXO5AO2FIISVRDFHFQ5W7W3HEHGNS5KD2JNYYZGCQ3NQPV22QCBI.data
@@ -0,0 +1,6 @@
+
+5
+" $G,A4{xZ/.D`200Bytes.txt3
+" Y9_)a˹2RmŖke9dir-another0
+" Ty5
;_9YfqFLhyl/level-1
+
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/CJ/CIQLMFYJNAZ2H33DVIPIS3A7S2WIMHBY62BY5OFCIR2NVHZUHT7PCJY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/CJ/CIQLMFYJNAZ2H33DVIPIS3A7S2WIMHBY62BY5OFCIR2NVHZUHT7PCJY.data
new file mode 100644
index 00000000..fa45ee79
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/CJ/CIQLMFYJNAZ2H33DVIPIS3A7S2WIMHBY62BY5OFCIR2NVHZUHT7PCJY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/CY/CIQDMKFEUGKSLXMEXO774EZOYCYNHPRVFD53ZSAU7237F67XDSQGCYQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/CY/CIQDMKFEUGKSLXMEXO774EZOYCYNHPRVFD53ZSAU7237F67XDSQGCYQ.data
new file mode 100644
index 00000000..bbe6bda7
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/CY/CIQDMKFEUGKSLXMEXO774EZOYCYNHPRVFD53ZSAU7237F67XDSQGCYQ.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/DJ/CIQCKYVVKKTZ5H7RDBHOBR72KDZZ7Y4BAVSMNITBWIGNAQQFG4UUDJQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/DJ/CIQCKYVVKKTZ5H7RDBHOBR72KDZZ7Y4BAVSMNITBWIGNAQQFG4UUDJQ.data
new file mode 100644
index 00000000..b99ceb21
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/DJ/CIQCKYVVKKTZ5H7RDBHOBR72KDZZ7Y4BAVSMNITBWIGNAQQFG4UUDJQ.data
@@ -0,0 +1,3 @@
+,
+" `u>/2l
ilfYB'M%bar
+
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/DZ/CIQH7E5UPL3DQGEXQXDT2QKNMU2GRLW43TH7QFHITLDVLJKQFZYEDZI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/DZ/CIQH7E5UPL3DQGEXQXDT2QKNMU2GRLW43TH7QFHITLDVLJKQFZYEDZI.data
new file mode 100644
index 00000000..be380799
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/DZ/CIQH7E5UPL3DQGEXQXDT2QKNMU2GRLW43TH7QFHITLDVLJKQFZYEDZI.data
@@ -0,0 +1,4730 @@
+
+[7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successfu
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/DZ/CIQL7TG2PB52XIZLLHDYIUFMHUQLMMZWBNBZSLDXFCPZ5VDNQQ2WDZQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/DZ/CIQL7TG2PB52XIZLLHDYIUFMHUQLMMZWBNBZSLDXFCPZ5VDNQQ2WDZQ.data
new file mode 100644
index 00000000..508cff2e
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/DZ/CIQL7TG2PB52XIZLLHDYIUFMHUQLMMZWBNBZSLDXFCPZ5VDNQQ2WDZQ.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/E7/CIQB3KY36M7HMZI5BLRPMPN7LOPHD2LZWNGBZR5BTOBHNWGBDFTAE7A.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/E7/CIQB3KY36M7HMZI5BLRPMPN7LOPHD2LZWNGBZR5BTOBHNWGBDFTAE7A.data
new file mode 100644
index 00000000..0b520379
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/E7/CIQB3KY36M7HMZI5BLRPMPN7LOPHD2LZWNGBZR5BTOBHNWGBDFTAE7A.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/EN/CIQCRUFL6YJQJMIKI6BX7HZT3BZQJV45EDKQXCYAUET3RJH5DDU3ENY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/EN/CIQCRUFL6YJQJMIKI6BX7HZT3BZQJV45EDKQXCYAUET3RJH5DDU3ENY.data
new file mode 100644
index 00000000..e705b9b0
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/EN/CIQCRUFL6YJQJMIKI6BX7HZT3BZQJV45EDKQXCYAUET3RJH5DDU3ENY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/EV/CIQJ5DVL3AD53EYMWBQG7MMACDUYWJDJ57AS6VIHPMOWJZEFZDUBEVI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/EV/CIQJ5DVL3AD53EYMWBQG7MMACDUYWJDJ57AS6VIHPMOWJZEFZDUBEVI.data
new file mode 100644
index 00000000..725a9b22
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/EV/CIQJ5DVL3AD53EYMWBQG7MMACDUYWJDJ57AS6VIHPMOWJZEFZDUBEVI.data
@@ -0,0 +1,5 @@
+
+@:4
+" si"¹W\IzxEElM/fLICENSE1
+" JZXoRX!Fwd87U;SöWw README.md{
+
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/HD/CIQDDVW2EZIJF4NQH7WJNESD7XHQSXA5EGJVNTPVHD7444C2KLKXHDI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/HD/CIQDDVW2EZIJF4NQH7WJNESD7XHQSXA5EGJVNTPVHD7444C2KLKXHDI.data
new file mode 100644
index 00000000..5ea0edda
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/HD/CIQDDVW2EZIJF4NQH7WJNESD7XHQSXA5EGJVNTPVHD7444C2KLKXHDI.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/HD/CIQHILW6C2T5CZPUWAAOL5AJJEZ3MHJ5BNX7WPFW5RNT53JSYDZKHDY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/HD/CIQHILW6C2T5CZPUWAAOL5AJJEZ3MHJ5BNX7WPFW5RNT53JSYDZKHDY.data
new file mode 100644
index 00000000..e845c839
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/HD/CIQHILW6C2T5CZPUWAAOL5AJJEZ3MHJ5BNX7WPFW5RNT53JSYDZKHDY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/HV/CIQOSORZMMBDPROY2NYNBLJRYMPGU23M5WLZD7BMT7RIF7RFJEOHHVY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/HV/CIQOSORZMMBDPROY2NYNBLJRYMPGU23M5WLZD7BMT7RIF7RFJEOHHVY.data
new file mode 100644
index 00000000..4eb5d7bf
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/HV/CIQOSORZMMBDPROY2NYNBLJRYMPGU23M5WLZD7BMT7RIF7RFJEOHHVY.data
@@ -0,0 +1,4 @@
+
+A;5
+" $G,A4{xZ/.D`200Bytes.txt
+;
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/HV/CIQPPIDDACRG4OUFQJQ7HWNALKTF6V5TVUZG34DEHJWZ2CFADQEQHVY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/HV/CIQPPIDDACRG4OUFQJQ7HWNALKTF6V5TVUZG34DEHJWZ2CFADQEQHVY.data
new file mode 100644
index 00000000..a762644a
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/HV/CIQPPIDDACRG4OUFQJQ7HWNALKTF6V5TVUZG34DEHJWZ2CFADQEQHVY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/I2/CIQMRL3OZOQ6FWVCILNBKQYHSKHGOHWXORGFUFDU6Z3SFI6MWC7CI2I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/I2/CIQMRL3OZOQ6FWVCILNBKQYHSKHGOHWXORGFUFDU6Z3SFI6MWC7CI2I.data
new file mode 100644
index 00000000..8e5a1d76
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/I2/CIQMRL3OZOQ6FWVCILNBKQYHSKHGOHWXORGFUFDU6Z3SFI6MWC7CI2I.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/IB/CIQMYGTLMBSYWR7X6Z676GQTL3W5NOSHG2QSNMWMASRY47R6DISDIBY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/IB/CIQMYGTLMBSYWR7X6Z676GQTL3W5NOSHG2QSNMWMASRY47R6DISDIBY.data
new file mode 100644
index 00000000..5b090964
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/IB/CIQMYGTLMBSYWR7X6Z676GQTL3W5NOSHG2QSNMWMASRY47R6DISDIBY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/IH/CIQA44S3C5B67N6QBLHMMHVPPOUE7L6CUBCDZVQGGAOYAGF3PHL3IHQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/IH/CIQA44S3C5B67N6QBLHMMHVPPOUE7L6CUBCDZVQGGAOYAGF3PHL3IHQ.data
new file mode 100644
index 00000000..f9810363
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/IH/CIQA44S3C5B67N6QBLHMMHVPPOUE7L6CUBCDZVQGGAOYAGF3PHL3IHQ.data
@@ -0,0 +1,4728 @@
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/IL/CIQJFGRQHQ45VCQLM7AJNF2GF5UHUAGGHC6LLAH6VYDEKLQMD4QLILY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/IL/CIQJFGRQHQ45VCQLM7AJNF2GF5UHUAGGHC6LLAH6VYDEKLQMD4QLILY.data
new file mode 100644
index 00000000..62d1c297
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/IL/CIQJFGRQHQ45VCQLM7AJNF2GF5UHUAGGHC6LLAH6VYDEKLQMD4QLILY.data
@@ -0,0 +1,8 @@
+
+Come hang out in our IRC chat room if you have any questions.
+
+Contact the ipfs dev team:
+- Bugs: https://github.com/ipfs/go-ipfs/issues
+- Help: irc.freenode.org/#ipfs
+- Email: dev@ipfs.io
+
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/J3/CIQCUWNWXF4BLH7JZBCXMVGYBS5FJPGJ6W6SP2WIZ4K7PJVNE4IXJ3I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/J3/CIQCUWNWXF4BLH7JZBCXMVGYBS5FJPGJ6W6SP2WIZ4K7PJVNE4IXJ3I.data
new file mode 100644
index 00000000..00360cfb
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/J3/CIQCUWNWXF4BLH7JZBCXMVGYBS5FJPGJ6W6SP2WIZ4K7PJVNE4IXJ3I.data
@@ -0,0 +1,3 @@
+4
+" UFrnb⇾?|< test-data
+
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/KB/CIQFDNVHVCWCRUXO3W7UGOPXRQBCBQYCHLVLZGSOLCJH6MH53TULKBI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/KB/CIQFDNVHVCWCRUXO3W7UGOPXRQBCBQYCHLVLZGSOLCJH6MH53TULKBI.data
new file mode 100644
index 00000000..026ac913
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/KB/CIQFDNVHVCWCRUXO3W7UGOPXRQBCBQYCHLVLZGSOLCJH6MH53TULKBI.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/KL/CIQI55DVBRTRLGOEFG3NSUVUVDC5FV3BQJ6BDG6TQLX5ZMDXH3CDKLY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/KL/CIQI55DVBRTRLGOEFG3NSUVUVDC5FV3BQJ6BDG6TQLX5ZMDXH3CDKLY.data
new file mode 100644
index 00000000..7c40850f
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/KL/CIQI55DVBRTRLGOEFG3NSUVUVDC5FV3BQJ6BDG6TQLX5ZMDXH3CDKLY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/KN/CIQC2GMPEBY4FY6LK3KAVQ6KPD2RFEURQB672H4QQPHGWO7HJZAIKNA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/KN/CIQC2GMPEBY4FY6LK3KAVQ6KPD2RFEURQB672H4QQPHGWO7HJZAIKNA.data
new file mode 100644
index 00000000..912b64e0
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/KN/CIQC2GMPEBY4FY6LK3KAVQ6KPD2RFEURQB672H4QQPHGWO7HJZAIKNA.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/KV/CIQEB4J2WBPZIHHIIG4LFC3VPCUL7IRICU6DOD4BWS6GFOQNMZSAKVI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/KV/CIQEB4J2WBPZIHHIIG4LFC3VPCUL7IRICU6DOD4BWS6GFOQNMZSAKVI.data
new file mode 100644
index 00000000..9f1e7af6
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/KV/CIQEB4J2WBPZIHHIIG4LFC3VPCUL7IRICU6DOD4BWS6GFOQNMZSAKVI.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LA/CIQOK7LUYNC5GJDWTV6SEZ4ZJJ64QAMOZ7DFV5GWMJMTD3PN73HPLAY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LA/CIQOK7LUYNC5GJDWTV6SEZ4ZJJ64QAMOZ7DFV5GWMJMTD3PN73HPLAY.data
new file mode 100644
index 00000000..dcd69d0b
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LA/CIQOK7LUYNC5GJDWTV6SEZ4ZJJ64QAMOZ7DFV5GWMJMTD3PN73HPLAY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LG/CIQJBQD2O6K4CGJVCCTJNUP57QHR4SKHZ74OIITBBGLOMCO3ZOLWLGA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LG/CIQJBQD2O6K4CGJVCCTJNUP57QHR4SKHZ74OIITBBGLOMCO3ZOLWLGA.data
new file mode 100644
index 00000000..71be805f
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LG/CIQJBQD2O6K4CGJVCCTJNUP57QHR4SKHZ74OIITBBGLOMCO3ZOLWLGA.data
@@ -0,0 +1,9 @@
+
+Some helpful resources for finding your way around ipfs:
+
+- quick-start: a quick show of various ipfs features.
+- ipfs commands: a list of all commands
+- ipfs --help: every command describes itself
+- https://github.com/ipfs/go-ipfs -- the src repository
+- #ipfs on irc.freenode.org -- the community irc channel
+
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LM/CIQIFHHL6JGE5AT5CX66FZUHQKFZ3F5J4HDTDEDSFIFRZJOOHYHYLMI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LM/CIQIFHHL6JGE5AT5CX66FZUHQKFZ3F5J4HDTDEDSFIFRZJOOHYHYLMI.data
new file mode 100644
index 00000000..aacafb9f
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LM/CIQIFHHL6JGE5AT5CX66FZUHQKFZ3F5J4HDTDEDSFIFRZJOOHYHYLMI.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LO/CIQJLKDKHMLW3CBIFI6FNN3RDDQJQ37UPBHLBSDE4IODA3OGUYZNLOI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LO/CIQJLKDKHMLW3CBIFI6FNN3RDDQJQ37UPBHLBSDE4IODA3OGUYZNLOI.data
new file mode 100644
index 00000000..ca141be2
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LO/CIQJLKDKHMLW3CBIFI6FNN3RDDQJQ37UPBHLBSDE4IODA3OGUYZNLOI.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LU/CIQKRGVCTXRXFOEYT7SM4CCDVTLJKVZBCKJ2K25QUHDSDC54EBIKLUI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LU/CIQKRGVCTXRXFOEYT7SM4CCDVTLJKVZBCKJ2K25QUHDSDC54EBIKLUI.data
new file mode 100644
index 00000000..69e8f9e4
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LU/CIQKRGVCTXRXFOEYT7SM4CCDVTLJKVZBCKJ2K25QUHDSDC54EBIKLUI.data
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/MJ/CIQJIH4MW534AFHKJ4RJWSADQQKP5JF75FJD5HAG2FNU24Y5GDLNMJA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/MJ/CIQJIH4MW534AFHKJ4RJWSADQQKP5JF75FJD5HAG2FNU24Y5GDLNMJA.data
new file mode 100644
index 00000000..637f391c
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/MJ/CIQJIH4MW534AFHKJ4RJWSADQQKP5JF75FJD5HAG2FNU24Y5GDLNMJA.data
@@ -0,0 +1,2 @@
+
+x\
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/NW/CIQHKHTR6BILKGTUCWOFDAU3EEHTE3TTXRHQU4JOD5RWMJNIKFKCNWA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/NW/CIQHKHTR6BILKGTUCWOFDAU3EEHTE3TTXRHQU4JOD5RWMJNIKFKCNWA.data
new file mode 100644
index 00000000..44403205
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/NW/CIQHKHTR6BILKGTUCWOFDAU3EEHTE3TTXRHQU4JOD5RWMJNIKFKCNWA.data
@@ -0,0 +1,3 @@
+
+
+x\
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/NY/CIQAS5P5V6R6ZWYCMEMIG77GPKPNN3IR55NKZVQ2KFWN35IZWHFVNYI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/NY/CIQAS5P5V6R6ZWYCMEMIG77GPKPNN3IR55NKZVQ2KFWN35IZWHFVNYI.data
new file mode 100644
index 00000000..cbd601a6
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/NY/CIQAS5P5V6R6ZWYCMEMIG77GPKPNN3IR55NKZVQ2KFWN35IZWHFVNYI.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/O6/CIQOYW2THIZBRGI7IN33ROGCKOFZLXJJ2MPKYZBTV4H3N7GYHXMAO6A.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/O6/CIQOYW2THIZBRGI7IN33ROGCKOFZLXJJ2MPKYZBTV4H3N7GYHXMAO6A.data
new file mode 100644
index 00000000..7b58d6c8
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/O6/CIQOYW2THIZBRGI7IN33ROGCKOFZLXJJ2MPKYZBTV4H3N7GYHXMAO6A.data
@@ -0,0 +1,3 @@
+/
+" @ԆDgA7directT2
+" ;APY0k}E=p
recursiveT
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/O7/CIQP7DQNED7CUCZXDXMNPJC2VQXADCIZ6KIZ3JF6FZDZYYKJMDCFO7Y.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/O7/CIQP7DQNED7CUCZXDXMNPJC2VQXADCIZ6KIZ3JF6FZDZYYKJMDCFO7Y.data
new file mode 100644
index 00000000..46d10573
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/O7/CIQP7DQNED7CUCZXDXMNPJC2VQXADCIZ6KIZ3JF6FZDZYYKJMDCFO7Y.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/OC/CIQC2GOQFKDUPKVP4XNQVV7M5ZJ4MP57IEM3W2WKV6GLUPPIZTZXOCQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/OC/CIQC2GOQFKDUPKVP4XNQVV7M5ZJ4MP57IEM3W2WKV6GLUPPIZTZXOCQ.data
new file mode 100644
index 00000000..3f5311b7
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/OC/CIQC2GOQFKDUPKVP4XNQVV7M5ZJ4MP57IEM3W2WKV6GLUPPIZTZXOCQ.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/OD/CIQOJATAB4IXQU4GS3N6IKABVBWLIVP5HQQOBQLSENPEZBXAU5WGODY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/OD/CIQOJATAB4IXQU4GS3N6IKABVBWLIVP5HQQOBQLSENPEZBXAU5WGODY.data
new file mode 100644
index 00000000..f0b3a599
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/OD/CIQOJATAB4IXQU4GS3N6IKABVBWLIVP5HQQOBQLSENPEZBXAU5WGODY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/OY/CIQDEMXSPYNNJT5CP2V6OMAMSK3VAN5F525AUQF4O5SHFSENFK6MOYA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/OY/CIQDEMXSPYNNJT5CP2V6OMAMSK3VAN5F525AUQF4O5SHFSENFK6MOYA.data
new file mode 100644
index 00000000..a3e60c9e
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/OY/CIQDEMXSPYNNJT5CP2V6OMAMSK3VAN5F525AUQF4O5SHFSENFK6MOYA.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/P7/CIQDOBRSMVCPS3KLV6TKWC6SUHX5RG5GAWZO4GENXABVJY3S6QTDP7I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/P7/CIQDOBRSMVCPS3KLV6TKWC6SUHX5RG5GAWZO4GENXABVJY3S6QTDP7I.data
new file mode 100644
index 00000000..bb713c56
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/P7/CIQDOBRSMVCPS3KLV6TKWC6SUHX5RG5GAWZO4GENXABVJY3S6QTDP7I.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/PX/CIQPRIAYMCVHLHZ6F4ZK5YTMBUFWS3GJM3NMSWKCSETRGTI5EWJKPXQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/PX/CIQPRIAYMCVHLHZ6F4ZK5YTMBUFWS3GJM3NMSWKCSETRGTI5EWJKPXQ.data
new file mode 100644
index 00000000..5accb645
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/PX/CIQPRIAYMCVHLHZ6F4ZK5YTMBUFWS3GJM3NMSWKCSETRGTI5EWJKPXQ.data
@@ -0,0 +1,3 @@
+5
+" $G,A4{xZ/.D`200Bytes.txt
+
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QE/CIQBIGRKU5D2NNT76M7A62F6AVPNDXPC7EBVACIKPANWX3MEZ5FOQEA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QE/CIQBIGRKU5D2NNT76M7A62F6AVPNDXPC7EBVACIKPANWX3MEZ5FOQEA.data
new file mode 100644
index 00000000..c3a2f685
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QE/CIQBIGRKU5D2NNT76M7A62F6AVPNDXPC7EBVACIKPANWX3MEZ5FOQEA.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QF/CIQCIGHH7ZD6OLFCIHXDI65QV7HXRBC2F4XBVG4KUCTQIA2EMAJ7QFY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QF/CIQCIGHH7ZD6OLFCIHXDI65QV7HXRBC2F4XBVG4KUCTQIA2EMAJ7QFY.data
new file mode 100644
index 00000000..a655cf83
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QF/CIQCIGHH7ZD6OLFCIHXDI65QV7HXRBC2F4XBVG4KUCTQIA2EMAJ7QFY.data
@@ -0,0 +1,5 @@
+
+wxxM{
+DzH/&^RS/v,R
+=Ng~pf1\[>%U1@Q׀2&m6qQ]|!KE~J
֕읝ojbn3eT)D+;s
+컓:Ty!c3\*T7E?[Pv}A+cx~e
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QF/CIQGPALRQ24P6NS4OWHTQ7R247ZI7KJWP3QWPQYS43LFULQC5ANLQFI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QF/CIQGPALRQ24P6NS4OWHTQ7R247ZI7KJWP3QWPQYS43LFULQC5ANLQFI.data
new file mode 100644
index 00000000..a8f98693
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QF/CIQGPALRQ24P6NS4OWHTQ7R247ZI7KJWP3QWPQYS43LFULQC5ANLQFI.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QU/CIQHUGO6PZFU3HS5W5Y25STP74OMT5SRRJZXL4LHDRQ3SIM7NCODQUI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QU/CIQHUGO6PZFU3HS5W5Y25STP74OMT5SRRJZXL4LHDRQ3SIM7NCODQUI.data
new file mode 100644
index 00000000..6d043733
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QU/CIQHUGO6PZFU3HS5W5Y25STP74OMT5SRRJZXL4LHDRQ3SIM7NCODQUI.data
@@ -0,0 +1,2 @@
+
+rː'Q#
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QV/CIQHXG7QQS5B7FZ3UOMUYOWJZ5KWXCKGGO7MBLHYPX6ZZ5XZK66AQVI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QV/CIQHXG7QQS5B7FZ3UOMUYOWJZ5KWXCKGGO7MBLHYPX6ZZ5XZK66AQVI.data
new file mode 100644
index 00000000..1524efce
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QV/CIQHXG7QQS5B7FZ3UOMUYOWJZ5KWXCKGGO7MBLHYPX6ZZ5XZK66AQVI.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QV/CIQOHMGEIKMPYHAUTL57JSEZN64SIJ5OIHSGJG4TJSSJLGI3PBJLQVI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QV/CIQOHMGEIKMPYHAUTL57JSEZN64SIJ5OIHSGJG4TJSSJLGI3PBJLQVI.data
new file mode 100644
index 00000000..e69de29b
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/R3/CIQBED3K6YA5I3QQWLJOCHWXDRK5EXZQILBCKAPEDUJENZ5B5HJ5R3A.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/R3/CIQBED3K6YA5I3QQWLJOCHWXDRK5EXZQILBCKAPEDUJENZ5B5HJ5R3A.data
new file mode 100644
index 00000000..389e1117
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/R3/CIQBED3K6YA5I3QQWLJOCHWXDRK5EXZQILBCKAPEDUJENZ5B5HJ5R3A.data
@@ -0,0 +1,28 @@
+
+Hello and Welcome to IPFS!
+
+██╗██████╗ ███████╗███████╗
+██║██╔══██╗██╔════╝██╔════╝
+██║██████╔╝█████╗ ███████╗
+██║██╔═══╝ ██╔══╝ ╚════██║
+██║██║ ██║ ███████║
+╚═╝╚═╝ ╚═╝ ╚══════╝
+
+If you're seeing this, you have successfully installed
+IPFS and are now interfacing with the ipfs merkledag!
+
+ -------------------------------------------------------
+| Warning: |
+| This is alpha software. Use at your own discretion! |
+| Much is missing or lacking polish. There are bugs. |
+| Not yet secure. Read the security notes for more. |
+ -------------------------------------------------------
+
+Check out some of the other files in this directory:
+
+ ./about
+ ./help
+ ./quick-start <-- usage examples
+ ./readme <-- this file
+ ./security-notes
+
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/RJ/CIQMLF3XEWG6VSOUXDSJ5BG26LKETTMBVMXIW7JKRHRPLIBNCFCYRJI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/RJ/CIQMLF3XEWG6VSOUXDSJ5BG26LKETTMBVMXIW7JKRHRPLIBNCFCYRJI.data
new file mode 100644
index 00000000..5a59204a
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/RJ/CIQMLF3XEWG6VSOUXDSJ5BG26LKETTMBVMXIW7JKRHRPLIBNCFCYRJI.data
@@ -0,0 +1,2 @@
+
+stem. Some
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/RW/CIQC3G665ZIYGOQYC3MQULKGIBXEOFAZHK46I5UQ3O7EEJQP4FW6RWI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/RW/CIQC3G665ZIYGOQYC3MQULKGIBXEOFAZHK46I5UQ3O7EEJQP4FW6RWI.data
new file mode 100644
index 00000000..1a86e0be
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/RW/CIQC3G665ZIYGOQYC3MQULKGIBXEOFAZHK46I5UQ3O7EEJQP4FW6RWI.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/RX/CIQE66Z547DTRXMML2XLVBUPU4PW4HGY3HNOT22D27SWEWL7BM4KRXA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/RX/CIQE66Z547DTRXMML2XLVBUPU4PW4HGY3HNOT22D27SWEWL7BM4KRXA.data
new file mode 100644
index 00000000..74f62a02
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/RX/CIQE66Z547DTRXMML2XLVBUPU4PW4HGY3HNOT22D27SWEWL7BM4KRXA.data
@@ -0,0 +1,3 @@
+
+
+'Q#
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/S5/CIQHBGZNZRPWVEFNMTLP4OS5EAVHFMCX2HD7FZUC2B3WUU3D4LGKS5A.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/S5/CIQHBGZNZRPWVEFNMTLP4OS5EAVHFMCX2HD7FZUC2B3WUU3D4LGKS5A.data
new file mode 100644
index 00000000..3a99c365
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/S5/CIQHBGZNZRPWVEFNMTLP4OS5EAVHFMCX2HD7FZUC2B3WUU3D4LGKS5A.data
@@ -0,0 +1,3 @@
+4
+" Y9_)a˹2RmŖke9js-ipfs-repo
+
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/SHARDING b/packages/ipfs-unixfs-importer/test/test-repo/blocks/SHARDING
new file mode 100644
index 00000000..a153331d
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/SHARDING
@@ -0,0 +1 @@
+/repo/flatfs/shard/v1/next-to-last/2
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/SQ/CIQABZII22CKQPRFRNJDBZLZDVWDLXB4FB63ZSHKE25TXTZ5PRFNSQQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/SQ/CIQABZII22CKQPRFRNJDBZLZDVWDLXB4FB63ZSHKE25TXTZ5PRFNSQQ.data
new file mode 100644
index 00000000..38a7ed3a
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/SQ/CIQABZII22CKQPRFRNJDBZLZDVWDLXB4FB63ZSHKE25TXTZ5PRFNSQQ.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/SS/CIQJYN3KXS5PFDN3C3UKITZEAQ5E2ZZSIKJ5GLFKYZH5G5GPOPZRSSQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/SS/CIQJYN3KXS5PFDN3C3UKITZEAQ5E2ZZSIKJ5GLFKYZH5G5GPOPZRSSQ.data
new file mode 100644
index 00000000..562529a2
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/SS/CIQJYN3KXS5PFDN3C3UKITZEAQ5E2ZZSIKJ5GLFKYZH5G5GPOPZRSSQ.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/SY/CIQGL4ZZ3NU3CYC6HCGJL2PB3SCFDLNIMJ2VBGFII2ETFDJKKRWCSYY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/SY/CIQGL4ZZ3NU3CYC6HCGJL2PB3SCFDLNIMJ2VBGFII2ETFDJKKRWCSYY.data
new file mode 100644
index 00000000..dedf499f
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/SY/CIQGL4ZZ3NU3CYC6HCGJL2PB3SCFDLNIMJ2VBGFII2ETFDJKKRWCSYY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/T3/CIQFIELZYM2Q2O27HFMWNO3RQCBUN42MNB4WYEHTCKOBKEOQC4X4T3I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/T3/CIQFIELZYM2Q2O27HFMWNO3RQCBUN42MNB4WYEHTCKOBKEOQC4X4T3I.data
new file mode 100644
index 00000000..9e5174d0
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/T3/CIQFIELZYM2Q2O27HFMWNO3RQCBUN42MNB4WYEHTCKOBKEOQC4X4T3I.data
@@ -0,0 +1,4 @@
+5
+" $G,A4{xZ/.D`200Bytes.txt/
+" Y9_)a˹2RmŖke9level-2
+
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/T3/CIQHHFPK232RMHO7DXAEDSDEBS2BUY2XK2X3LJTP4SPIM5NYBINUT3Y.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/T3/CIQHHFPK232RMHO7DXAEDSDEBS2BUY2XK2X3LJTP4SPIM5NYBINUT3Y.data
new file mode 100644
index 00000000..5a3836e9
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/T3/CIQHHFPK232RMHO7DXAEDSDEBS2BUY2XK2X3LJTP4SPIM5NYBINUT3Y.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/TK/CIQCATTJHTFQJK6QMVRDXHZBQLVTXHRZR2G3R5OU3G7HREQTYK3KTKQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/TK/CIQCATTJHTFQJK6QMVRDXHZBQLVTXHRZR2G3R5OU3G7HREQTYK3KTKQ.data
new file mode 100644
index 00000000..a4027d46
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/TK/CIQCATTJHTFQJK6QMVRDXHZBQLVTXHRZR2G3R5OU3G7HREQTYK3KTKQ.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/TW/CIQFEAGMNNXXTYKYQSANT6IBNTFN7WR5RPD5F6GN6MBKUUO25DNOTWQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/TW/CIQFEAGMNNXXTYKYQSANT6IBNTFN7WR5RPD5F6GN6MBKUUO25DNOTWQ.data
new file mode 100644
index 00000000..10aa2ae4
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/TW/CIQFEAGMNNXXTYKYQSANT6IBNTFN7WR5RPD5F6GN6MBKUUO25DNOTWQ.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/TW/CIQMVX3GSIETJNHF3OOH7TMLHB74V6MEEZY5V54Z7JHJV2MUZ7R2TWI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/TW/CIQMVX3GSIETJNHF3OOH7TMLHB74V6MEEZY5V54Z7JHJV2MUZ7R2TWI.data
new file mode 100644
index 00000000..c1f9899a
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/TW/CIQMVX3GSIETJNHF3OOH7TMLHB74V6MEEZY5V54Z7JHJV2MUZ7R2TWI.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/U2/CIQBQTRBKKRZNSXRN5A2DTCL5QYIC75SVOKTZJSFYV7IHLCDDT6IU2Q.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/U2/CIQBQTRBKKRZNSXRN5A2DTCL5QYIC75SVOKTZJSFYV7IHLCDDT6IU2Q.data
new file mode 100644
index 00000000..4e910622
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/U2/CIQBQTRBKKRZNSXRN5A2DTCL5QYIC75SVOKTZJSFYV7IHLCDDT6IU2Q.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/UI/CIQIKOTOTJQ2NFB4A3MMLHKZBTNOIK3QAZ3XSCASZZUPD2ZDRHTOUIY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/UI/CIQIKOTOTJQ2NFB4A3MMLHKZBTNOIK3QAZ3XSCASZZUPD2ZDRHTOUIY.data
new file mode 100644
index 00000000..871a6bf0
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/UI/CIQIKOTOTJQ2NFB4A3MMLHKZBTNOIK3QAZ3XSCASZZUPD2ZDRHTOUIY.data
@@ -0,0 +1,4729 @@
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+
+There have been many attempts at constructing a global
+distributed file system. Some systems have seen signifi-
+cant success, and others failed completely. Among the academic
+attempts, AFS [6] has succeeded widely and is still
+in use today. Others [7, ?] have not attained the same
+success. Outside of academia, the most successful systems
+have been peer-to-peer file-sharing applications primarily
+geared toward large media (audio and video). Most notably,
+Napster, KaZaA, and BitTorrent [2] deployed large
+file distribution systems supporting over 100 million simultaneous
+users. Even today, BitTorrent maintains a massive
+deployment where tens of millions of nodes churn daily [16].
+These applications saw greater numbers of users and files distributed
+than their academic file system counterparts. However,
+the applications were not designed as infrastructure to
+be built upon. While there have been successful repurposings1
+, no general file-system has emerged that offers global,
+low-latency, and decentralized distribution.
+Perhaps this is because a “good enough” system for most
+use cases already exists: HTTP. By far, HTTP is the most
+successful “distributed system of files” ever deployed. Coupled
+with the browser, HTTP has had enormous technical
+and social impact. It has become the de facto way to transmit
+files across the internet. Yet, it fails to take advantage
+of dozens of brilliant file distribution techniques invented in
+the last fifteen years. From one prespective, evolving Web
+infrastructure is near-impossible, given the number of backwards
+compatibility constraints and the number of strong
+1For example, Linux distributions use BitTorrent to transmit
+disk images, and Blizzard, Inc. uses it to distribute
+video game content.
+parties invested in the current model. But from another perspective,
+new protocols have emerged and gained wide use
+since the emergence of HTTP. What is lacking is upgrading
+design: enhancing the current HTTP web, and introducing
+new functionality without degrading user experience.
+Industry has gotten away with using HTTP this long because
+moving small files around is relatively cheap, even for
+small organizations with lots of traffic. But we are entering
+a new era of data distribution with new challenges: (a)
+hosting and distributing petabyte datasets, (b) computing
+on large data across organizations, (c) high-volume highdefinition
+on-demand or real-time media streams, (d) versioning
+and linking of massive datasets, (e) preventing accidental
+disappearance of important files, and more. Many
+of these can be boiled down to “lots of data, accessible everywhere.”
+Pressed by critical features and bandwidth concerns,
+we have already given up HTTP for different data
+distribution protocols. The next step is making them part
+of the Web itself.
+Orthogonal to efficient data distribution, version control
+systems have managed to develop important data collaboration
+workflows. Git, the distributed source code version
+control system, developed many useful ways to model and
+implement distributed data operations. The Git toolchain
+offers versatile versioning functionality that large file distribution
+systems severely lack. New solutions inspired by Git
+are emerging, such as Camlistore [?], a personal file storage
+system, and Dat [?] a data collaboration toolchain
+and dataset package manager. Git has already influenced
+distributed filesystem design [9], as its content addressed
+Merkle DAG data model enables powerful file distribution
+strategies. What remains to be explored is how this data
+structure can influence the design of high-throughput oriented
+file systems, and how it might upgrade the Web itself.
+This paper introduces IPFS, a novel peer-to-peer versioncontrolled
+filesystem seeking to reconcile these issues. IPFS
+synthesizes learnings from many past successful systems.
+Careful interface-focused integration yields a system greater
+than the sum of its parts. The central IPFS principle is
+modeling all data as part of the same Merkle DAG.
+There have been many attempts at constructing a global
+distributed file sy
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/UN/CIQJXEREB4VRPZXUPP4EUUMABGQ4FLCIV7RFAMWV6VTYHGGTHMQOUNA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/UN/CIQJXEREB4VRPZXUPP4EUUMABGQ4FLCIV7RFAMWV6VTYHGGTHMQOUNA.data
new file mode 100644
index 00000000..a6e00f34
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/UN/CIQJXEREB4VRPZXUPP4EUUMABGQ4FLCIV7RFAMWV6VTYHGGTHMQOUNA.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/UN/CIQOMBKARLB7PAITVSNH7VEGIQJRPL6J7FT2XYVKAXT4MQPXXPUYUNY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/UN/CIQOMBKARLB7PAITVSNH7VEGIQJRPL6J7FT2XYVKAXT4MQPXXPUYUNY.data
new file mode 100644
index 00000000..b6539897
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/UN/CIQOMBKARLB7PAITVSNH7VEGIQJRPL6J7FT2XYVKAXT4MQPXXPUYUNY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VD/CIQM57VXK2GGRETV46PZJAGXIPIE5O6JRUKAV7BBCJWARIYFH3ZEVDY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VD/CIQM57VXK2GGRETV46PZJAGXIPIE5O6JRUKAV7BBCJWARIYFH3ZEVDY.data
new file mode 100644
index 00000000..6b72d373
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VD/CIQM57VXK2GGRETV46PZJAGXIPIE5O6JRUKAV7BBCJWARIYFH3ZEVDY.data
@@ -0,0 +1,2 @@
+
+u r[
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VJ/CIQCHY6PCZKAFELCCPFO7GDQ6JVXLCA47BWB47DSAT5DLNKZC4BBVJY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VJ/CIQCHY6PCZKAFELCCPFO7GDQ6JVXLCA47BWB47DSAT5DLNKZC4BBVJY.data
new file mode 100644
index 00000000..9cda061b
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VJ/CIQCHY6PCZKAFELCCPFO7GDQ6JVXLCA47BWB47DSAT5DLNKZC4BBVJY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VK/CIQECT7TVGIFJGMWNC2ZGSXRYQXFPA4ZVFEFYTIZ5CZLMW5F3VZDVKY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VK/CIQECT7TVGIFJGMWNC2ZGSXRYQXFPA4ZVFEFYTIZ5CZLMW5F3VZDVKY.data
new file mode 100644
index 00000000..7f2f4e92
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VK/CIQECT7TVGIFJGMWNC2ZGSXRYQXFPA4ZVFEFYTIZ5CZLMW5F3VZDVKY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VO/CIQGFTQ7FSI2COUXWWLOQ45VUM2GUZCGAXLWCTOKKPGTUWPXHBNIVOY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VO/CIQGFTQ7FSI2COUXWWLOQ45VUM2GUZCGAXLWCTOKKPGTUWPXHBNIVOY.data
new file mode 100644
index 00000000..2dd80560
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VO/CIQGFTQ7FSI2COUXWWLOQ45VUM2GUZCGAXLWCTOKKPGTUWPXHBNIVOY.data
@@ -0,0 +1,114 @@
+
+
# 0.1 - Quick Start
+
+This is a set of short examples with minimal explanation. It is meant as
+a "quick start". Soon, we'll write a longer tour :-)
+
+
+Add a file to ipfs:
+
+ echo "hello world" >hello
+ ipfs add hello
+
+
+View it:
+
+ ipfs cat
+
+
+Try a directory:
+
+ mkdir foo
+ mkdir foo/bar
+ echo "baz" > foo/baz
+ echo "baz" > foo/bar/baz
+ ipfs add -r foo
+
+
+View things:
+
+ ipfs ls
+ ipfs ls /bar
+ ipfs cat /baz
+ ipfs cat /bar/baz
+ ipfs cat /bar
+ ipfs ls /baz
+
+
+References:
+
+ ipfs refs
+ ipfs refs -r
+ ipfs refs --help
+
+
+Get:
+
+ ipfs get foo2
+ diff foo foo2
+
+
+Objects:
+
+ ipfs object get
+ ipfs object get /foo2
+ ipfs object --help
+
+
+Pin + GC:
+
+ ipfs pin -r
+ ipfs gc
+ ipfs ls
+ ipfs unpin -r
+ ipfs gc
+
+
+Daemon:
+
+ ipfs daemon (in another terminal)
+ ipfs id
+
+
+Network:
+
+ (must be online)
+ ipfs swarm peers
+ ipfs id
+ ipfs cat
+
+
+Mount:
+
+ (warning: fuse is finicky!)
+ ipfs mount
+ cd /ipfs/<
+
+
+Tool:
+
+ ipfs version
+ ipfs update
+ ipfs commands
+ ipfs config --help
+ open http://localhost:5001/webui
+
+
+Browse:
+
+ webui:
+
+ http://localhost:5001/webui
+
+ video:
+
+ http://localhost:8080/ipfs/QmVc6zuAneKJzicnJpfrqCH9gSy6bz54JhcypfJYhGUFQu/play#/ipfs/QmTKZgRNwDNZwHtJSjCp6r5FYefzpULfy37JvMt9DwvXse
+
+ images:
+
+ http://localhost:8080/ipfs/QmZpc3HvfjEXvLWGQPWbHk3AjD5j8NEN4gmFN8Jmrd5g83/cs
+
+ markdown renderer app:
+
+ http://localhost:8080/ipfs/QmX7M9CiYXjVeFnkfVGf3y5ixTZ2ACeSGyL1vBJY1HvQPp/mdown
+
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VR/CIQDWXASKC6E6M5YUHWLFE3D5ORIQALUCDXFGAGNWUDBLMHCNE7NVRQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VR/CIQDWXASKC6E6M5YUHWLFE3D5ORIQALUCDXFGAGNWUDBLMHCNE7NVRQ.data
new file mode 100644
index 00000000..64ce0aeb
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VR/CIQDWXASKC6E6M5YUHWLFE3D5ORIQALUCDXFGAGNWUDBLMHCNE7NVRQ.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/W4/CIQCZN3PHPV7FIQRZ2RMICUTK4IM4CCOKKJ45QAY2MK34ECEPNVWW4I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/W4/CIQCZN3PHPV7FIQRZ2RMICUTK4IM4CCOKKJ45QAY2MK34ECEPNVWW4I.data
new file mode 100644
index 00000000..81663143
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/W4/CIQCZN3PHPV7FIQRZ2RMICUTK4IM4CCOKKJ45QAY2MK34ECEPNVWW4I.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/WH/CIQOEE6HDAGCAUN6YNJ2WMWJUZ6PTIZPGPCLKYGPGTIRX5IMJNXZWHQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/WH/CIQOEE6HDAGCAUN6YNJ2WMWJUZ6PTIZPGPCLKYGPGTIRX5IMJNXZWHQ.data
new file mode 100644
index 00000000..b75d8023
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/WH/CIQOEE6HDAGCAUN6YNJ2WMWJUZ6PTIZPGPCLKYGPGTIRX5IMJNXZWHQ.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/X3/CIQFTFEEHEDF6KLBT32BFAGLXEZL4UWFNWM4LFTLMXQBCERZ6CMLX3Y.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/X3/CIQFTFEEHEDF6KLBT32BFAGLXEZL4UWFNWM4LFTLMXQBCERZ6CMLX3Y.data
new file mode 100644
index 00000000..9553a942
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/X3/CIQFTFEEHEDF6KLBT32BFAGLXEZL4UWFNWM4LFTLMXQBCERZ6CMLX3Y.data
@@ -0,0 +1,2 @@
+
+
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/XL/CIQAARXKSMRHHPDJX4RMZVL2FA3652JYS3PGJZYATFZEVYZTYZ7OXLQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/XL/CIQAARXKSMRHHPDJX4RMZVL2FA3652JYS3PGJZYATFZEVYZTYZ7OXLQ.data
new file mode 100644
index 00000000..e80dbd9a
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/XL/CIQAARXKSMRHHPDJX4RMZVL2FA3652JYS3PGJZYATFZEVYZTYZ7OXLQ.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/XO/CIQJGO2B2N75IUEM372FSMG76VV256I4PXBULZZ5ASNLK4FL4EG7XOI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/XO/CIQJGO2B2N75IUEM372FSMG76VV256I4PXBULZZ5ASNLK4FL4EG7XOI.data
new file mode 100644
index 00000000..d899663b
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/XO/CIQJGO2B2N75IUEM372FSMG76VV256I4PXBULZZ5ASNLK4FL4EG7XOI.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/Y5/CIQDNPSZDJIJ4OGB34IJIMF4NVVGGYKXGXX4DKZ3GF5O4XQ5TCEKY5I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/Y5/CIQDNPSZDJIJ4OGB34IJIMF4NVVGGYKXGXX4DKZ3GF5O4XQ5TCEKY5I.data
new file mode 100644
index 00000000..ba0caf40
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/Y5/CIQDNPSZDJIJ4OGB34IJIMF4NVVGGYKXGXX4DKZ3GF5O4XQ5TCEKY5I.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/YE/CIQAHHAIILXU6ZJ3QZRQJFXG22DLMMTR3ZMBZ3P3DXUEXXVG6UCOYEQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/YE/CIQAHHAIILXU6ZJ3QZRQJFXG22DLMMTR3ZMBZ3P3DXUEXXVG6UCOYEQ.data
new file mode 100644
index 00000000..1d48c015
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/YE/CIQAHHAIILXU6ZJ3QZRQJFXG22DLMMTR3ZMBZ3P3DXUEXXVG6UCOYEQ.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/YI/CIQN7OB5A4BJDFSWOWU5P2PHGINZFYFOW4SGA22CWXPWRHTLKR6MYII.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/YI/CIQN7OB5A4BJDFSWOWU5P2PHGINZFYFOW4SGA22CWXPWRHTLKR6MYII.data
new file mode 100644
index 00000000..b1df8c51
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/YI/CIQN7OB5A4BJDFSWOWU5P2PHGINZFYFOW4SGA22CWXPWRHTLKR6MYII.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/Z3/CIQHZ35U3FQWXNWA5EK5UUB7WWOAO6DB6OS3PKO65SS674P4ZTJ4Z3A.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/Z3/CIQHZ35U3FQWXNWA5EK5UUB7WWOAO6DB6OS3PKO65SS674P4ZTJ4Z3A.data
new file mode 100644
index 00000000..b0ac590e
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/Z3/CIQHZ35U3FQWXNWA5EK5UUB7WWOAO6DB6OS3PKO65SS674P4ZTJ4Z3A.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZE/CIQAWCSZTDXZMMUBJ4ZT5AP4QK7NG2ICSLINABMIGOV3FL2GV5NBZEI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZE/CIQAWCSZTDXZMMUBJ4ZT5AP4QK7NG2ICSLINABMIGOV3FL2GV5NBZEI.data
new file mode 100644
index 00000000..3b40300d
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZE/CIQAWCSZTDXZMMUBJ4ZT5AP4QK7NG2ICSLINABMIGOV3FL2GV5NBZEI.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZJ/CIQAC3JCY6ILJZIESXPWN37WGYJHMRBO75J3KRL47DPFYLDHEYZFZJY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZJ/CIQAC3JCY6ILJZIESXPWN37WGYJHMRBO75J3KRL47DPFYLDHEYZFZJY.data
new file mode 100644
index 00000000..819ec6cf
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZJ/CIQAC3JCY6ILJZIESXPWN37WGYJHMRBO75J3KRL47DPFYLDHEYZFZJY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZP/CIQK5Z2VYLPEM4X75GWQP23OV737H23CYXARTYUHX2LD73PQPQMTZPY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZP/CIQK5Z2VYLPEM4X75GWQP23OV737H23CYXARTYUHX2LD73PQPQMTZPY.data
new file mode 100644
index 00000000..c57d7186
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZP/CIQK5Z2VYLPEM4X75GWQP23OV737H23CYXARTYUHX2LD73PQPQMTZPY.data differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/_README b/packages/ipfs-unixfs-importer/test/test-repo/blocks/_README
new file mode 100644
index 00000000..23cb0909
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/blocks/_README
@@ -0,0 +1,30 @@
+This is a repository of IPLD objects. Each IPLD object is in a single file,
+named .data. Where is the
+"base32" encoding of the CID (as specified in
+https://github.com/multiformats/multibase) without the 'B' prefix.
+All the object files are placed in a tree of directories, based on a
+function of the CID. This is a form of sharding similar to
+the objects directory in git repositories. Previously, we used
+prefixes, we now use the next-to-last two charters.
+
+ func NextToLast(base32cid string) {
+ nextToLastLen := 2
+ offset := len(base32cid) - nextToLastLen - 1
+ return str[offset : offset+nextToLastLen]
+ }
+
+For example, an object with a base58 CIDv1 of
+
+ zb2rhYSxw4ZjuzgCnWSt19Q94ERaeFhu9uSqRgjSdx9bsgM6f
+
+has a base32 CIDv1 of
+
+ BAFKREIA22FLID5AJ2KU7URG47MDLROZIH6YF2KALU2PWEFPVI37YLKRSCA
+
+and will be placed at
+
+ SC/AFKREIA22FLID5AJ2KU7URG47MDLROZIH6YF2KALU2PWEFPVI37YLKRSCA.data
+
+with 'SC' being the last-to-next two characters and the 'B' at the
+beginning of the CIDv1 string is the multibase prefix that is not
+stored in the filename.
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/config b/packages/ipfs-unixfs-importer/test/test-repo/config
new file mode 100644
index 00000000..cbcdfe3b
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/config
@@ -0,0 +1 @@
+{"Identity":{"PeerID":"QmQ2zigjQikYnyYUSXZydNXrDRhBut2mubwJBaLXobMt3A","PrivKey":"CAASpgkwggSiAgEAAoIBAQC2SKo/HMFZeBml1AF3XijzrxrfQXdJzjePBZAbdxqKR1Mc6juRHXij6HXYPjlAk01BhF1S3Ll4Lwi0cAHhggf457sMg55UWyeGKeUv0ucgvCpBwlR5cQ020i0MgzjPWOLWq1rtvSbNcAi2ZEVn6+Q2EcHo3wUvWRtLeKz+DZSZfw2PEDC+DGPJPl7f8g7zl56YymmmzH9liZLNrzg/qidokUv5u1pdGrcpLuPNeTODk0cqKB+OUbuKj9GShYECCEjaybJDl9276oalL9ghBtSeEv20kugatTvYy590wFlJkkvyl+nPxIH0EEYMKK9XRWlu9XYnoSfboiwcv8M3SlsjAgMBAAECggEAZtju/bcKvKFPz0mkHiaJcpycy9STKphorpCT83srBVQi59CdFU6Mj+aL/xt0kCPMVigJw8P3/YCEJ9J+rS8BsoWE+xWUEsJvtXoT7vzPHaAtM3ci1HZd302Mz1+GgS8Epdx+7F5p80XAFLDUnELzOzKftvWGZmWfSeDnslwVONkL/1VAzwKy7Ce6hk4SxRE7l2NE2OklSHOzCGU1f78ZzVYKSnS5Ag9YrGjOAmTOXDbKNKN/qIorAQ1bovzGoCwx3iGIatQKFOxyVCyO1PsJYT7JO+kZbhBWRRE+L7l+ppPER9bdLFxs1t5CrKc078h+wuUr05S1P1JjXk68pk3+kQKBgQDeK8AR11373Mzib6uzpjGzgNRMzdYNuExWjxyxAzz53NAR7zrPHvXvfIqjDScLJ4NcRO2TddhXAfZoOPVH5k4PJHKLBPKuXZpWlookCAyENY7+Pd55S8r+a+MusrMagYNljb5WbVTgN8cgdpim9lbbIFlpN6SZaVjLQL3J8TWH6wKBgQDSChzItkqWX11CNstJ9zJyUE20I7LrpyBJNgG1gtvz3ZMUQCn3PxxHtQzN9n1P0mSSYs+jBKPuoSyYLt1wwe10/lpgL4rkKWU3/m1Myt0tveJ9WcqHh6tzcAbb/fXpUFT/o4SWDimWkPkuCb+8j//2yiXk0a/T2f36zKMuZvujqQKBgC6B7BAQDG2H2B/ijofp12ejJU36nL98gAZyqOfpLJ+FeMz4TlBDQ+phIMhnHXA5UkdDapQ+zA3SrFk+6yGk9Vw4Hf46B+82SvOrSbmnMa+PYqKYIvUzR4gg34rL/7AhwnbEyD5hXq4dHwMNsIDq+l2elPjwm/U9V0gdAl2+r50HAoGALtsKqMvhv8HucAMBPrLikhXP/8um8mMKFMrzfqZ+otxfHzlhI0L08Bo3jQrb0Z7ByNY6M8epOmbCKADsbWcVre/AAY0ZkuSZK/CaOXNX/AhMKmKJh8qAOPRY02LIJRBCpfS4czEdnfUhYV/TYiFNnKRj57PPYZdTzUsxa/yVTmECgYBr7slQEjb5Onn5mZnGDh+72BxLNdgwBkhO0OCdpdISqk0F0Pxby22DFOKXZEpiyI9XYP1C8wPiJsShGm2yEwBPWXnrrZNWczaVuCbXHrZkWQogBDG3HGXNdU4MAWCyiYlyinIBpPpoAJZSzpGLmWbMWh28+RJS6AQX6KHrK1o2uw=="},"Datastore":{"Type":"","Path":"","StorageMax":"","StorageGCWatermark":0,"GCPeriod":"","Params":null,"NoSync":false},"Addresses":{"Swarm":["/ip4/0.0.0.0/tcp/4001","/ip6/::/tcp/4001"],"API":"/ip4/127.0.0.1/tcp/5001","Gateway":"/ip4/127.0.0.1/tcp/8080"},"Mounts":{"IPFS":"/ipfs","IPNS":"/ipns","FuseAllowOther":false},"Version":{"Current":"0.4.0-dev","Check":"error","CheckDate":"0001-01-01T00:00:00Z","CheckPeriod":"172800000000000","AutoUpdate":"minor"},"Discovery":{"MDNS":{"Enabled":true,"Interval":10}},"Ipns":{"RepublishPeriod":"","RecordLifetime":"","ResolveCacheSize":128},"Bootstrap":["/ip4/104.131.131.82/tcp/4001/ipfs/QmaCpDMGvV2BGHeYERUEnRQAwe3N8SzbUtfsmvsqQLuvuJ","/ip4/104.236.176.52/tcp/4001/ipfs/QmSoLnSGccFuZQJzRadHn95W2CrSFmZuTdDWP8HXaHca9z","/ip4/104.236.179.241/tcp/4001/ipfs/QmSoLPppuBtQSGwKDZT2M73ULpjvfd3aZ6ha4oFGL1KrGM","/ip4/162.243.248.213/tcp/4001/ipfs/QmSoLueR4xBeUbY9WZ9xGUUxunbKWcrNFTDAadQJmocnWm","/ip4/128.199.219.111/tcp/4001/ipfs/QmSoLSafTMBsPKadTEgaXctDQVcqN88CNLHXMkTNwMKPnu","/ip4/104.236.76.40/tcp/4001/ipfs/QmSoLV4Bbm51jM9C4gDYZQ9Cy3U6aXMJDAbzgu2fzaDs64","/ip4/178.62.158.247/tcp/4001/ipfs/QmSoLer265NRgSp2LA3dPaeykiS1J6DifTC88f5uVQKNAd","/ip4/178.62.61.185/tcp/4001/ipfs/QmSoLMeWqB7YGVLJN3pNLQpmmEk35v6wYtsMGLzSr5QBU3","/ip4/104.236.151.122/tcp/4001/ipfs/QmSoLju6m7xTh3DuokvT3886QRYqxAzb1kShaanJgW36yx"],"Tour":{"Last":""},"Gateway":{"HTTPHeaders":null,"RootRedirect":"","Writable":false},"SupernodeRouting":{"Servers":["/ip4/104.236.176.52/tcp/4002/ipfs/QmXdb7tWTxdFEQEFgWBqkuYSrZd3mXrC7HxkD4krGNYx2U","/ip4/104.236.179.241/tcp/4002/ipfs/QmVRqViDByUxjUMoPnjurjKvZhaEMFDtK35FJXHAM4Lkj6","/ip4/104.236.151.122/tcp/4002/ipfs/QmSZwGx8Tn8tmcM4PtDJaMeUQNRhNFdBLVGPzRiNaRJtFH","/ip4/162.243.248.213/tcp/4002/ipfs/QmbHVEEepCi7rn7VL7Exxpd2Ci9NNB6ifvqwhsrbRMgQFP","/ip4/128.199.219.111/tcp/4002/ipfs/Qmb3brdCYmKG1ycwqCbo6LUwWxTuo3FisnJV2yir7oN92R","/ip4/104.236.76.40/tcp/4002/ipfs/QmdRBCV8Cz2dGhoKLkD3YjPwVFECmqADQkx5ZteF2c6Fy4","/ip4/178.62.158.247/tcp/4002/ipfs/QmUdiMPci7YoEUBkyFZAh2pAbjqcPr7LezyiPD2artLw3v","/ip4/178.62.61.185/tcp/4002/ipfs/QmVw6fGNqBixZE4bewRLT2VXX7fAHUHs8JyidDiJ1P7RUN"]},"API":{"HTTPHeaders":null},"Swarm":{"AddrFilters":null},"Log":{"MaxSizeMB":250,"MaxBackups":1,"MaxAgeDays":0}}
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/000002.ldb b/packages/ipfs-unixfs-importer/test/test-repo/datastore/000002.ldb
new file mode 100644
index 00000000..fc04d660
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/datastore/000002.ldb differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/000005.ldb b/packages/ipfs-unixfs-importer/test/test-repo/datastore/000005.ldb
new file mode 100644
index 00000000..63d9d260
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/datastore/000005.ldb differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/CURRENT b/packages/ipfs-unixfs-importer/test/test-repo/datastore/CURRENT
new file mode 100644
index 00000000..5b540107
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/datastore/CURRENT
@@ -0,0 +1 @@
+MANIFEST-000011
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOCK b/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOCK
new file mode 100644
index 00000000..e69de29b
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOG b/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOG
new file mode 100644
index 00000000..fb2ef830
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOG
@@ -0,0 +1,5 @@
+=============== Aug 19, 2016 (CEST) ===============
+15:48:10.633634 log@legend F·NumFile S·FileSize N·Entry C·BadEntry B·BadBlock Ke·KeyError D·DroppedEntry L·Level Q·SeqNum T·TimeElapsed
+15:48:10.634191 db@open opening
+15:48:10.639318 db@janitor F·4 G·0
+15:48:10.639379 db@open done T·5.16729ms
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOG.old b/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOG.old
new file mode 100644
index 00000000..f5ffd612
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOG.old
@@ -0,0 +1,7 @@
+=============== Apr 22, 2016 (WEST) ===============
+03:16:42.272495 log@legend F·NumFile S·FileSize N·Entry C·BadEntry B·BadBlock Ke·KeyError D·DroppedEntry L·Level Q·SeqNum T·TimeElapsed
+03:16:42.272857 db@open opening
+03:16:42.275673 db@janitor F·4 G·0
+03:16:42.275700 db@open done T·2.831108ms
+03:16:42.596938 db@close closing
+03:16:42.597082 db@close done T·139.194µs
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/MANIFEST-000011 b/packages/ipfs-unixfs-importer/test/test-repo/datastore/MANIFEST-000011
new file mode 100644
index 00000000..7af87ca8
Binary files /dev/null and b/packages/ipfs-unixfs-importer/test/test-repo/datastore/MANIFEST-000011 differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/version b/packages/ipfs-unixfs-importer/test/test-repo/version
new file mode 100644
index 00000000..1e8b3149
--- /dev/null
+++ b/packages/ipfs-unixfs-importer/test/test-repo/version
@@ -0,0 +1 @@
+6